diff --git a/Extras/RigidBodyGpuPipeline/bin/glut32.dll b/Extras/RigidBodyGpuPipeline/bin/glut32.dll new file mode 100644 index 000000000..3297bd078 Binary files /dev/null and b/Extras/RigidBodyGpuPipeline/bin/glut32.dll differ diff --git a/Extras/RigidBodyGpuPipeline/bin/glut64.dll b/Extras/RigidBodyGpuPipeline/bin/glut64.dll new file mode 100644 index 000000000..5df6d9885 Binary files /dev/null and b/Extras/RigidBodyGpuPipeline/bin/glut64.dll differ diff --git a/Extras/RigidBodyGpuPipeline/build/findDirectX11.lua b/Extras/RigidBodyGpuPipeline/build/findDirectX11.lua new file mode 100644 index 000000000..68771c4a0 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/build/findDirectX11.lua @@ -0,0 +1,36 @@ +function findDirectX11() + local dx11path = os.getenv("DXSDK_DIR") + if (dx11path) then + local filepath = string.format("%s%s",dx11path,"Include/D3D11.h") + headerdx11 = io.open(filepath, "r") + if (headerdx11) then + printf("Found DX11: '%s'", filepath) + return true + end + end + return false + end + +function initDirectX11() + configuration {} + + local dx11path = os.getenv("DXSDK_DIR") + defines { "ADL_ENABLE_DX11"} + includedirs {"$(DXSDK_DIR)/include"} + + configuration "x32" + libdirs {"$(DXSDK_DIR)/Lib/x86"} + configuration "x64" + libdirs {"$(DXSDK_DIR)/Lib/x64"} + configuration {} + links {"d3dcompiler", + "dxerr", + "dxguid", + "d3dx9", + "d3d9", + "winmm", + "comctl32", + "d3dx11" + } + return true +end \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/build/findOpenCL.lua b/Extras/RigidBodyGpuPipeline/build/findOpenCL.lua new file mode 100644 index 000000000..913b8406d --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/build/findOpenCL.lua @@ -0,0 +1,84 @@ + -- todo: add Apple OpenCL environment vars + + function findOpenCL_AMD() + local amdopenclpath = os.getenv("AMDAPPSDKROOT") + if (amdopenclpath) then + return true + end + return false + end + + function findOpenCL_NVIDIA() + local nvidiaopenclpath = os.getenv("CUDA_PATH") + if (nvidiaopenclpath) then + return true + end + return false + end + + function findOpenCL_Intel() + local intelopenclpath = os.getenv("INTELOCLSDKROOT") + if (intelopenclpath) then + return true + end + return false + end + + function initOpenCL_AMD() + configuration {} + local amdopenclpath = os.getenv("AMDAPPSDKROOT") + if (amdopenclpath) then + defines { "ADL_ENABLE_CL" , "CL_PLATFORM_AMD"} + includedirs { + "$(AMDAPPSDKROOT)/include" + } + configuration "x32" + libdirs {"$(AMDAPPSDKROOT)/lib/x86"} + configuration "x64" + libdirs {"$(AMDAPPSDKROOT)/lib/x86_64"} + configuration {} + links {"OpenCL"} + return true + end + return false + end + + + function initOpenCL_NVIDIA() + configuration {} + local nvidiaopenclpath = os.getenv("CUDA_PATH") + if (nvidiaopenclpath) then + defines { "ADL_ENABLE_CL" , "CL_PLATFORM_NVIDIA"} + includedirs { + "$(CUDA_PATH)/include" + } + configuration "x32" + libdirs {"$(CUDA_PATH)/lib/Win32"} + configuration "x64" + libdirs {"$(CUDA_PATH)/lib/x64"} + configuration {} + links {"OpenCL"} + return true + end + return false + end + + function initOpenCL_Intel() + configuration {} + local intelopenclpath = os.getenv("INTELOCLSDKROOT") + if (intelopenclpath) then + defines { "ADL_ENABLE_CL" , "CL_PLATFORM_INTEL"} + includedirs { + "$(INTELOCLSDKROOT)/include" + } + configuration "x32" + libdirs {"$(INTELOCLSDKROOT)/lib/x86"} + configuration "x64" + libdirs {"$(INTELOCLSDKROOT)/lib/x64"} + configuration {} + links {"OpenCL"} + return true + end + return false + end + \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/build/findOpenGLGlewGlut.lua b/Extras/RigidBodyGpuPipeline/build/findOpenGLGlewGlut.lua new file mode 100644 index 000000000..2a04c6d70 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/build/findOpenGLGlewGlut.lua @@ -0,0 +1,52 @@ + -- todo: add Apple OpenCL environment vars + + function initOpenGL() + configuration {} + configuration {"Windows"} + links {"opengl32"} + configuration {"MacOSX"} + links { "Carbon.framework","OpenGL.framework","AGL.framework"} + configuration {"not Windows", "not MacOSX"} + links {"GL","GLU"} + configuration{} + end + + function initGlut() + configuration {} + configuration {"Windows"} + + includedirs { + projectRootDir .. "../../Glut" + } + libdirs { projectRootDir .. "../../Glut"} + configuration {"Windows", "x32"} + links {"glut32"} + configuration {"Windows", "x64"} + links {"glut64"} + + configuration {"MacOSX"} + links { "Glut.framework" } + + configuration {"not Windows", "not MacOSX"} + links {"glut"} + configuration{} + end + + function initGlew() + configuration {} + configuration {"Windows"} + defines { "GLEW_STATIC"} + includedirs { + projectRootDir .. "../../Glut" + } + libdirs { projectRootDir .. "../../Glut"} + configuration {"Windows", "x32"} + links {"glew32s"} + configuration {"Windows", "x64"} + links {"glew64s"} + + configuration{} + end + + + diff --git a/Extras/RigidBodyGpuPipeline/build/premake4.lua b/Extras/RigidBodyGpuPipeline/build/premake4.lua new file mode 100644 index 000000000..b317d35f7 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/build/premake4.lua @@ -0,0 +1,55 @@ +solution "0MySolution" + + -- Multithreaded compiling + if _ACTION == "vs2010" then + buildoptions { "/MP" } + end + + + + configurations {"Release", "Debug"} + configuration "Release" + flags { "Optimize", "StaticRuntime", "NoMinimalRebuild", "FloatFast"} + configuration "Debug" + flags { "Symbols", "StaticRuntime" , "NoMinimalRebuild", "NoEditAndContinue" ,"FloatFast"} + + platforms {"x32", "x64"} + + configuration "x64" + targetsuffix "_64" + configuration {"x64", "debug"} + targetsuffix "_x64_debug" + configuration {"x64", "release"} + targetsuffix "_x64" + configuration {"x32", "debug"} + targetsuffix "_debug" + + configuration{} + + flags { "NoRTTI", "NoExceptions"} + defines { "_HAS_EXCEPTIONS=0" } + targetdir "../bin" + location("./" .. _ACTION) + + + projectRootDir = os.getcwd() .. "/../" + print("Project root directroy: " .. projectRootDir); + + dofile ("findOpenCL.lua") + dofile ("findDirectX11.lua") + dofile ("findOpenGLGlewGlut.lua") + + language "C++" + + include "../opencl/gpu_rigidbody_pipeline2" + include "../opencl/gpu_rigidbody_pipeline" + + include "../opencl/basic_initialize" + include "../opencl/vector_add" + + include "../opencl/primitives/AdlTest" + include "../opencl/primitives/benchmark" + include "../opencl/3dGridBroadphase" + include "../opencl/broadphase_benchmark" + + \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/build/vs2008.bat b/Extras/RigidBodyGpuPipeline/build/vs2008.bat new file mode 100644 index 000000000..02665f970 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/build/vs2008.bat @@ -0,0 +1,10 @@ + +rem premake4 --no-pelibs vs2008 +rem premake4 --no-pedemos vs2008 +rem premake4 --no-bulletlibs --no-pelibs vs2008 +rem premake4 --with-nacl vs2008 + +..\..\..\msvc\premake4 vs2008 +mkdir vs2008\cache + +pause \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/build/vs2010.bat b/Extras/RigidBodyGpuPipeline/build/vs2010.bat new file mode 100644 index 000000000..9122bab9a --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/build/vs2010.bat @@ -0,0 +1,5 @@ + +..\..\..\msvc\premake4 vs2010 + +mkdir vs2010\cache +pause \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/AMD/premake4.lua b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/AMD/premake4.lua new file mode 100644 index 000000000..159e25fbc --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/AMD/premake4.lua @@ -0,0 +1,45 @@ +if os.is("Windows") then + + hasCL = findOpenCL_AMD() + + if (hasCL) then + + project "basic_bullet2_demo_AMD" + + initOpenCL_AMD() + + language "C++" + + kind "ConsoleApp" + targetdir "../../../bin" + + includedirs { + "..", + "../../../bullet2", + "../../testbed", + "../../../rendering/Gwen", + "../../../opencl/basic_initialize", + "../../../opencl/primitives" + } + + + links { "testbed", + "bullet2", + "gwen" + } + + + initOpenGL() + initGlut() + + + files { + "../**.cpp", + "../**.h", + "../../../opencl/basic_initialize/btOpenCLUtils.cpp", + "../../../opencl/basic_initialize/btOpenCLUtils.h" + } + + end + +end diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/BasicDemo.cpp b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/BasicDemo.cpp new file mode 100644 index 000000000..0b6d452ac --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/BasicDemo.cpp @@ -0,0 +1,538 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include "BasicDemo.h" +#include "GlutStuff.h" +///btBulletDynamicsCommon.h is the main Bullet include file, contains most common include files. +#include "btBulletDynamicsCommon.h" +#include "CustomConvexShape.h" +#include "CustomConvexPairCollision.h" +#include "CustomCollisionDispatcher.h" + +#include "ConvexHeightFieldShape.h" +#include "GLDebugDrawer.h" +static GLDebugDrawer sDebugDraw; + +#include //printf debugging + +#ifdef CL_PLATFORM_AMD +#include "../../opencl/basic_initialize/btOpenCLUtils.h" + +cl_context g_cxMainContext=0; +cl_command_queue g_cqCommandQue=0; +cl_device_id g_clDevice=0; +#endif + +///create 125 (5x5x5) dynamic object +#define ARRAY_SIZE_X 6 +#define ARRAY_SIZE_Y 6 +#define ARRAY_SIZE_Z 4 + +//maximum number of objects (and allow user to shoot additional boxes) +#define MAX_PROXIES (ARRAY_SIZE_X*ARRAY_SIZE_Y*ARRAY_SIZE_Z + 1024) + +///scaling of the objects (0.1 = 20 centimeter boxes ) +#define SCALING 1. +#define START_POS_X 0 +#define START_POS_Y -0.8 +#define START_POS_Z 0 + +#define BoxVtxCount 8 + +static float BoxVtx[] = { +-0.5,-0.5,-0.5, +-0.5,-0.5,0.5, +-0.5,0.5,-0.5, +-0.5,0.5,0.5, +0.5,-0.5,-0.5, +0.5,-0.5,0.5, +0.5,0.5,-0.5, +0.5,0.5,0.5, +}; + +static float BoxVtx2[] = { +-20.3,-10.3,-20.3, +-20.3,-10.3,20.3, +-20.3,10.3,-20.3, +-20.3,10.3,20.3, +20.3,-10.3,-20.3, +20.3,-10.3,20.3, +20.3,10.3,-20.3, +20.3,10.3,20.3, +}; + + +#define BarrelVtxCount2 57 + +static float BarrelVtx2[] = { +0.0f,-0.5f,0.0f, 0.0f,-1.0f,0.0f, +0.282362f,-0.5f,-0.205148f, 0.0f,-1.0f,0.0f, +0.349018f,-0.5f,0.0f, 0.0f,-1.0f,0.0f, +0.107853f,-0.5f,-0.331936f, 0.0f,-1.0f,0.0f, +-0.107853f,-0.5f,-0.331936f, 0.0f,-1.0f,0.0f, +0.107853f,-0.5f,-0.331936f, 0.0f,-1.0f,0.0f, +-0.282362f,-0.5f,-0.205148f, 0.0f,-1.0f,0.0f, +-0.349018f,-0.5f,0.0f, 0.0f,-1.0f,0.0f, +-0.282362f,-0.5f,0.205148f, 0.0f,-1.0f,0.0f, +-0.107853f,-0.5f,0.331936f, 0.0f,-1.0f,0.0f, +0.107853f,-0.5f,0.331936f, 0.0f,-1.0f,0.0f, +0.282362f,-0.5f,0.205148f, 0.0f,-1.0f,0.0f, +0.0f,0.5f,0.0f, 0.0f,1.0f,0.0f, +0.349018f,0.5f,0.0f, 0.0f,1.0f,0.0f, +0.282362f,0.5f,-0.205148f, 0.0f,1.0f,0.0f, +0.107853f,0.5f,-0.331936f, 0.0f,1.0f,0.0f, +0.107853f,0.5f,-0.331936f, 0.0f,1.0f,0.0f, +-0.107853f,0.5f,-0.331936f, 0.0f,1.0f,0.0f, +-0.282362f,0.5f,-0.205148f, 0.0f,1.0f,0.0f, +-0.349018f,0.5f,0.0f, 0.0f,1.0f,0.0f, +-0.282362f,0.5f,0.205148f, 0.0f,1.0f,0.0f, +-0.107853f,0.5f,0.331936f, 0.0f,1.0f,0.0f, +0.107853f,0.5f,0.331936f, 0.0f,1.0f,0.0f, +0.282362f,0.5f,0.205148f, 0.0f,1.0f,0.0f, +0.349018f,-0.5f,0.0f, 0.957307f,-0.289072f,0.0f, +0.404509f,0.0f,-0.293893f, 0.809017f,0.0f,-0.587785f, +0.5f,0.0f,0.0f, 1.0f,0.0f,0.0f, +0.282362f,-0.5f,-0.205148f, 0.774478f,-0.289072f,-0.562691f, +0.154508f,0.0f,-0.475528f, 0.309017f,0.0f,-0.951057f, +0.107853f,-0.5f,-0.331936f, 0.295824f,-0.289072f,-0.910453f, +0.107853f,-0.5f,-0.331936f, 0.295824f,-0.289072f,-0.910453f, +-0.154509f,0.0f,-0.475528f, -0.309017f,0.0f,-0.951057f, +0.154508f,0.0f,-0.475528f, 0.309017f,0.0f,-0.951057f, +-0.107853f,-0.5f,-0.331936f, -0.295824f,-0.289072f,-0.910453f, +-0.404509f,0.0f,-0.293893f, -0.809017f,0.0f,-0.587785f, +-0.282362f,-0.5f,-0.205148f, -0.774478f,-0.289072f,-0.562691f, +-0.5f,0.0f,0.0f, -1.0f,0.0f,0.0f, +-0.349018f,-0.5f,0.0f, -0.957307f,-0.289072f,0.0f, +-0.404508f,0.0f,0.293893f, -0.809017f,0.0f,0.587785f, +-0.282362f,-0.5f,0.205148f, -0.774478f,-0.289072f,0.562691f, +-0.154509f,0.0f,0.475528f, -0.309017f,0.0f,0.951056f, +-0.107853f,-0.5f,0.331936f, -0.295824f,-0.289072f,0.910453f, +0.154509f,0.0f,0.475528f, 0.309017f,0.0f,0.951056f, +0.107853f,-0.5f,0.331936f, 0.295824f,-0.289072f,0.910453f, +0.404509f,0.0f,0.293892f, 0.809017f,0.0f,0.587785f, +0.282362f,-0.5f,0.205148f, 0.774478f,-0.289072f,0.562691f, +0.282362f,0.5f,-0.205148f, 0.774478f,0.289072f,-0.562691f, +0.349018f,0.5f,0.0f, 0.957307f,0.289072f,0.0f, +0.107853f,0.5f,-0.331936f, 0.295824f,0.289072f,-0.910453f, +-0.107853f,0.5f,-0.331936f, -0.295824f,0.289072f,-0.910453f, +0.107853f,0.5f,-0.331936f, 0.295824f,0.289072f,-0.910453f, +-0.282362f,0.5f,-0.205148f, -0.774478f,0.289072f,-0.562691f, +-0.349018f,0.5f,0.0f, -0.957307f,0.289072f,0.0f, +-0.282362f,0.5f,0.205148f, -0.774478f,0.289072f,0.562691f, +-0.107853f,0.5f,0.331936f, -0.295824f,0.289072f,0.910453f, +0.107853f,0.5f,0.331936f, 0.295824f,0.289072f,0.910453f, +0.282362f,0.5f,0.205148f, 0.774478f,0.289072f,0.562691f, +}; + + +static int BarrelIdx[] = { +0,1,2, +0,3,1, +0,4,5, +0,6,4, +0,7,6, +0,8,7, +0,9,8, +0,10,9, +0,11,10, +0,2,11, +12,13,14, +12,14,15, +12,16,17, +12,17,18, +12,18,19, +12,19,20, +12,20,21, +12,21,22, +12,22,23, +12,23,13, +24,25,26, +24,27,25, +27,28,25, +27,29,28, +30,31,32, +30,33,31, +33,34,31, +33,35,34, +35,36,34, +35,37,36, +37,38,36, +37,39,38, +39,40,38, +39,41,40, +41,42,40, +41,43,42, +43,44,42, +43,45,44, +45,26,44, +45,24,26, +26,46,47, +26,25,46, +25,48,46, +25,28,48, +32,49,50, +32,31,49, +31,51,49, +31,34,51, +34,52,51, +34,36,52, +36,53,52, +36,38,53, +38,54,53, +38,40,54, +40,55,54, +40,42,55, +42,56,55, +42,44,56, +44,47,56, +44,26,47, +}; + + +__inline void glVertexFloat4( const float4& v ) +{ + glVertex3f( v.x, v.y, v.z ); +} + +__inline void drawPointListTransformed(const float4* vtx, int nVtx, const float4& translation, const Quaternion& quat) +{ + glPushMatrix(); + + Matrix3x3 rotMat = mtTranspose( qtGetRotationMatrix( quat ) ); + float transformMat[16] = + { + rotMat.m_row[0].x, rotMat.m_row[0].y, rotMat.m_row[0].z, 0, + rotMat.m_row[1].x, rotMat.m_row[1].y, rotMat.m_row[1].z, 0, + rotMat.m_row[2].x, rotMat.m_row[2].y, rotMat.m_row[2].z, 0, + translation.x, translation.y, translation.z,1 + }; + + glMultMatrixf( transformMat ); + + float4 c = make_float4(1,1,0,0); + + glPointSize(3.f); + glBegin(GL_POINTS); + for(int i=0; igetDebugDrawer()->getDebugMode()& btIDebugDraw::DBG_DrawContactPoints) + for (int i=0;igetCollisionObjectArray().size();i++) + { + btCollisionObject* ob = m_dynamicsWorld->getCollisionObjectArray()[i]; + if (ob->getCollisionShape()->getShapeType() == CUSTOM_POLYHEDRAL_SHAPE_TYPE) + { + CustomConvexShape* customConvex = (CustomConvexShape*)ob->getCollisionShape(); + ConvexHeightField* cvxShape= customConvex->m_ConvexHeightField; + if (!cvxShape) + { + printf("aargh\n"); + } + + float4 bodyApos; + Quaternion bodyAquat; + + + const btVector3& pA = ob->getWorldTransform().getOrigin(); + btQuaternion qA = ob->getWorldTransform().getRotation(); + + bodyApos.x = pA.getX(); + bodyApos.y = pA.getY(); + bodyApos.z = pA.getZ(); + bodyApos.w = 0.f; + bodyAquat.x = qA.getX(); + bodyAquat.y = qA.getY(); + bodyAquat.z = qA.getZ(); + bodyAquat.w = qA.getW(); + + + displaySamples(cvxShape->getSamplePoints(),cvxShape->getNumSamplePoints(),bodyApos,bodyAquat); + + } + + } +} +void BasicDemo::clientMoveAndDisplay() +{ + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + + //simple dynamics world doesn't handle fixed-time-stepping + float ms = getDeltaTimeMicroseconds(); + + ///step the simulation + if (m_dynamicsWorld) + { + m_dynamicsWorld->stepSimulation(ms / 1000000.f); + //optional but useful: debug drawing + m_dynamicsWorld->debugDrawWorld(); + } + + renderme(); + + renderSurfacePoints(); + + + glFlush(); + + swapBuffers(); + +} + + + +void BasicDemo::displayCallback(void) { + + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + + renderme(); + + renderSurfacePoints(); + + //optional but useful: debug drawing to detect problems + if (m_dynamicsWorld) + m_dynamicsWorld->debugDrawWorld(); + + glFlush(); + swapBuffers(); +} + + + + + +void BasicDemo::initPhysics() +{ + setTexturing(true); + setShadows(true); + + m_acceleratedRigidBodies = 0; + + setCameraDistance(btScalar(SCALING*20.)); + + ///collision configuration contains default setup for memory, collision setup + m_collisionConfiguration = new btDefaultCollisionConfiguration(); + //m_collisionConfiguration->setConvexConvexMultipointIterations(); + + ///use the default collision dispatcher. For parallel processing you can use a diffent dispatcher (see Extras/BulletMultiThreaded) + m_dispatcher = new btCollisionDispatcher(m_collisionConfiguration); + + +#ifdef CL_PLATFORM_AMD + m_dispatcher = new CustomCollisionDispatcher(m_collisionConfiguration, g_cxMainContext,g_clDevice,g_cqCommandQue); +#else + m_dispatcher = new CustomCollisionDispatcher(m_collisionConfiguration); +#endif + + m_dispatcher->registerCollisionCreateFunc(CUSTOM_POLYHEDRAL_SHAPE_TYPE,CUSTOM_POLYHEDRAL_SHAPE_TYPE,new CustomConvexConvexPairCollision::CreateFunc(m_collisionConfiguration->getSimplexSolver(), m_collisionConfiguration->getPenetrationDepthSolver())); + + m_broadphase = new btDbvtBroadphase(); + + ///the default constraint solver. For parallel processing you can use a different solver (see Extras/BulletMultiThreaded) + btSequentialImpulseConstraintSolver* sol = new btSequentialImpulseConstraintSolver; + m_solver = sol; + + m_dynamicsWorld = new btDiscreteDynamicsWorld(m_dispatcher,m_broadphase,m_solver,m_collisionConfiguration); + + m_dynamicsWorld->setGravity(btVector3(0,-10,0)); + + m_dynamicsWorld->setDebugDrawer(&sDebugDraw); + + ///create a few basic rigid bodies + //btCollisionShape* groundShape = new btBoxShape(btVector3(btScalar(50.),btScalar(50.),btScalar(50.))); +#if 1 + CustomConvexShape* groundShape = new CustomConvexShape(BoxVtx2,BoxVtxCount,3*sizeof(float)); + //btCollisionShape* groundShape = new btStaticPlaneShape(btVector3(0,1,0),0); + + m_collisionShapes.push_back(groundShape); + + btTransform groundTransform; + groundTransform.setIdentity(); + groundTransform.setOrigin(btVector3(0,-11,0)); + + //We can also use DemoApplication::localCreateRigidBody, but for clarity it is provided here: + { + btScalar mass(0.); + + //rigidbody is dynamic if and only if mass is non zero, otherwise static + bool isDynamic = (mass != 0.f); + + btVector3 localInertia(0,0,0); + if (isDynamic) + groundShape->calculateLocalInertia(mass,localInertia); + + //using motionstate is recommended, it provides interpolation capabilities, and only synchronizes 'active' objects + btDefaultMotionState* myMotionState = new btDefaultMotionState(groundTransform); + btRigidBody::btRigidBodyConstructionInfo rbInfo(mass,myMotionState,groundShape,localInertia); + btRigidBody* body = new btRigidBody(rbInfo); + + //add the body to the dynamics world + m_dynamicsWorld->addRigidBody(body); + } +#endif + + + { + //create a few dynamic rigidbodies + // Re-using the same collision is better for memory usage and performance + + //btCollisionShape* colShape = new btBoxShape(btVector3(SCALING*1,SCALING*1,SCALING*1)); + //btCollisionShape* colShape = new btSphereShape(btScalar(1.)); +#define USE_CUSTOM_HEIGHTFIELD_SHAPE +#ifdef USE_CUSTOM_HEIGHTFIELD_SHAPE + CustomConvexShape* colShape = new CustomConvexShape(BarrelVtx2,BarrelVtxCount2,6*sizeof(float)); + + //CustomConvexShape* colShape = new CustomConvexShape(BoxVtx,BoxVtxCount,3*sizeof(float)); +#else + btConvexHullShape* colShape = new btConvexHullShape(BarrelVtx2,BarrelVtxCount2,6*sizeof(float)); + colShape->setLocalScaling(btVector3(0.9,0.9,0.9)); + +#endif //USE_CUSTOM_HEIGHTFIELD_SHAPE + btScalar scale = 0.5f; + + //btScalar scale = 1.f; + + //next line is already called inside the CustomConvexShape constructor + //colShape->initializePolyhedralFeatures(); + + m_collisionShapes.push_back(colShape); + + /// Create Dynamic Objects + btTransform startTransform; + startTransform.setIdentity(); + + btScalar mass(1.f); + + //rigidbody is dynamic if and only if mass is non zero, otherwise static + bool isDynamic = (mass != 0.f); + + btVector3 localInertia(0,0,0); + if (isDynamic) + colShape->calculateLocalInertia(mass,localInertia); + + float start_x = START_POS_X - ARRAY_SIZE_X/2; + float start_y = START_POS_Y; + float start_z = START_POS_Z - ARRAY_SIZE_Z/2; + + for (int k=0;k0) && ((j<2) || (j>(ARRAY_SIZE_Z-3)))) + // continue; + // if ((k>0) && ((i<2) || (i>(ARRAY_SIZE_X-3)))) + // continue; + + startTransform.setOrigin(SCALING*btVector3( + btScalar(scale*2.0*i + start_x), + btScalar(scale*1+scale*2.0*k + start_y), + btScalar(scale*2.0*j + start_z))); + + + //using motionstate is recommended, it provides interpolation capabilities, and only synchronizes 'active' objects + btDefaultMotionState* myMotionState = new btDefaultMotionState(startTransform); + btRigidBody* body=0; + + if (0)//k==0) + { + btVector3 zeroInertia(0,0,0); + btRigidBody::btRigidBodyConstructionInfo rbInfo(0.f,myMotionState,colShape,zeroInertia); + body = new btRigidBody(rbInfo); + } else + { + btRigidBody::btRigidBodyConstructionInfo rbInfo(mass,myMotionState,colShape,localInertia); + body = new btRigidBody(rbInfo); + } + + //m_acceleratedRigidBodies is used as a mapping to the accelerated rigid body index + body->setCompanionId(m_acceleratedRigidBodies++); + m_dynamicsWorld->addRigidBody(body); + + } + } + } + } + } + + +} +void BasicDemo::clientResetScene() +{ + exitPhysics(); + initPhysics(); +} + + +void BasicDemo::exitPhysics() +{ + + //cleanup in the reverse order of creation/initialization + + //remove the rigidbodies from the dynamics world and delete them + int i; + for (i=m_dynamicsWorld->getNumCollisionObjects()-1; i>=0 ;i--) + { + btCollisionObject* obj = m_dynamicsWorld->getCollisionObjectArray()[i]; + btRigidBody* body = btRigidBody::upcast(obj); + if (body && body->getMotionState()) + { + delete body->getMotionState(); + } + m_dynamicsWorld->removeCollisionObject( obj ); + delete obj; + } + + //delete collision shapes + for (int j=0;j m_collisionShapes; + + btBroadphaseInterface* m_broadphase; + + btCollisionDispatcher* m_dispatcher; + + btConstraintSolver* m_solver; + + btDefaultCollisionConfiguration* m_collisionConfiguration; + + int m_acceleratedRigidBodies; + + public: + + BasicDemo() + { + } + virtual ~BasicDemo() + { + exitPhysics(); + } + void initPhysics(); + + void exitPhysics(); + + virtual void clientMoveAndDisplay(); + + virtual void displayCallback(); + virtual void clientResetScene(); + + static DemoApplication* Create() + { + BasicDemo* demo = new BasicDemo; + demo->myinit(); + demo->initPhysics(); + return demo; + } + + void renderSurfacePoints(); + + +}; + +#endif //BASIC_DEMO_H + diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/ConvexHeightFieldShape.cpp b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/ConvexHeightFieldShape.cpp new file mode 100644 index 000000000..e5e0d649e --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/ConvexHeightFieldShape.cpp @@ -0,0 +1,507 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +#include "ConvexHeightFieldShape.h" +#include "Stubs/AdlCollideUtils.h" +#include "CubeMapUtils.h" +//#include +//#include +//#include "GlutStuff.h" + +//#define USE_OLD + +ConvexHeightField::ConvexHeightField(const float4* vtxBuffer, const int4* idxBuffer, int nTriangles) +: CollisionShape( SHAPE_CONVEX_HEIGHT_FIELD ) +{ + create( vtxBuffer, idxBuffer, nTriangles ); +} + +void ConvexHeightField::create( const float4* vtxBuffer, const int4* idxBuffer, int nTriangles ) +{ + { + float maxDx2 = -1.f; + int maxIdx = -1; + for(int i=0; i maxDx2 ) + { + maxDx2 = dx2; + maxIdx = idx.s[j]; + } + } + } + ADLASSERT( maxIdx != -1 ); + m_scale = sqrtf( maxDx2 ); + } + + // cast ray to find intersectPlaneLineions + { + for(u32 faceIdx=0; faceIdx<6; faceIdx++) + { + for(int i=0; i 0.f ) + { + minFraction = min2( minFraction, fraction ); // todo. have to check if this is the min to replace normal? + float4 ab = vtxBuffer[idxBuffer[itri].y]-vtxBuffer[idxBuffer[itri].x]; + float4 ac = vtxBuffer[idxBuffer[itri].z]-vtxBuffer[idxBuffer[itri].x]; + minNormal = cross3( ab, ac ); + minBCrd = bCrd; + } + } + + if( minFraction == FLT_MAX ) + minFraction = 0.f; + + { + u8 quantizedHeight = (u8)(minFraction*255.f); + sample( (Face)faceIdx, i,j ) = quantizedHeight; + sampleNormal( (Face)faceIdx, i,j ) = normalize3(minNormal); + float minValue = 3.f*(1.f/3.f)*(1.f/3.f); + sampleNormal( (Face)faceIdx, i,j ).w = (dot3F4( minBCrd, minBCrd ) - minValue )/(1.f-minValue); + } + } + } + } + + calcSamplePoints( m_samplePoints ); + + // calc support height using m_samplePoints + { + for(u32 faceIdx=0; faceIdx<6; faceIdx++) for(int i=0; i maxHeight ) maxHeight = h; + } + + { + u8 quantizedHeight = min2((u8)(maxHeight*255.f)+1, 255); + sampleSupport( (Face)faceIdx, i, j ) = quantizedHeight; + } + } + } + + m_aabb.setEmpty(); + for(int i=0; i 0.f ) + { + if( fraction < minFraction ) + { + minFraction = fraction; + minNormal = iEqn; + } + } + } + + ADLASSERT( minFraction != FLT_MAX ); + + minNormal.w = minFraction; + sampleNormal( (Face)faceIdx, i, j ) = minNormal; + } + } + } + + { + m_scale = -FLT_MAX; + for(u32 faceIdx=0; faceIdx<6; faceIdx++) + { + for(int i=0; i1.f) + h=1.f; +// ADLASSERT( h <= 1.f ); + if( h > maxHeight ) maxHeight = h; + } + + { + u8 quantizedHeight = min2((u8)(maxHeight*255.f)+1, 255); + sampleSupport( (Face)faceIdx, i, j ) = quantizedHeight; + } + } + } + + for(int i=0; i<6; i++) + { + m_faceAabbs[i].setEmpty(); + for(int j=0; jm_type == ADL_SHAPE_SPHERE ) + { + SphereShape* sphere = (SphereShape*)shape; + + m_scale = sphere->m_radius; + for(u32 faceIdx=0; faceIdx<6; faceIdx++) + { + for(int i=0; im_radius ); + m_aabb.m_min = make_float4( -sphere->m_radius ); + + m_aabb.expandBy( make_float4( m_collisionMargin ) ); + + for(int i=0; i<6; i++) + { + m_faceAabbs[i].setEmpty(); + for(int j=0; jgetVertexBuffer(), s->getTriangleBuffer(), s->getNumTris() ); + } +} +#endif + +ConvexHeightField::~ConvexHeightField() +{ + +} + +float ConvexHeightField::queryDistance(const float4& p ) const +{ + const float4 majorAxes[] = {make_float4(1,0,0,0), make_float4(0,1,0,0), make_float4(0,0,1,0)}; + + if( dot3F4( p, p ) >= m_scale*m_scale ) return FLT_MAX; + + int faceIdx; + float x, y; + CubeMapUtils::calcCrd( p, faceIdx, x, y ); + x = (x*HEIGHT_RES) - 0.5f; + y = (y*HEIGHT_RES) - 0.5f; + + float height; + { + int xi = (int)(x); + int yi = (int)(y); + float dx = x-xi; + float dy = y-yi; + + { + int xip = min2((int)(HEIGHT_RES-1), xi+1); + int yip = min2((int)(HEIGHT_RES-1), yi+1); + + u8 xy = sample( (Face)faceIdx, xi, yi ); + u8 xpy = sample( (Face)faceIdx, xip, yi ); + u8 xpyp = sample( (Face)faceIdx, xip, yip ); + u8 xyp = sample( (Face)faceIdx, xi, yip ); + + height = (xy*(1.f-dx)+xpy*dx)*(1.f-dy) + (xyp*(1.f-dx)+xpyp*dx)*dy; + height = height/255.f*m_scale; + + height = length3( p ) - height; + } + } + + return height; +} + +float ConvexHeightField::querySupportHeight(const float4& p ) const +{ + const float4 majorAxes[] = {make_float4(1,0,0,0), make_float4(0,1,0,0), make_float4(0,0,1,0)}; + +// if( dot3F4( p, p ) >= m_scale*m_scale ) return FLT_MAX; + + int faceIdx; + float x, y; + CubeMapUtils::calcCrd( p, faceIdx, x, y ); + x = (x*HEIGHT_RES) - 0.5f; + y = (y*HEIGHT_RES) - 0.5f; + + float height; + { + int xi = (int)(x); + int yi = (int)(y); + float dx = x-xi; + float dy = y-yi; + + { + int xip = min2((int)(HEIGHT_RES-1), xi+1); + int yip = min2((int)(HEIGHT_RES-1), yi+1); + + u8 xy = sampleSupport( (Face)faceIdx, xi, yi ); + u8 xpy = sampleSupport( (Face)faceIdx, xip, yi ); + u8 xpyp = sampleSupport( (Face)faceIdx, xip, yip ); + u8 xyp = sampleSupport( (Face)faceIdx, xi, yip ); + + height = max2( xy, max2( xpy, max2( xpyp, xyp ) ) ); + height = height/255.f*m_scale; + } + } + + return height; +} + +float ConvexHeightField::queryW(const float4& p ) const +{ + const float4 majorAxes[] = {make_float4(1,0,0,0), make_float4(0,1,0,0), make_float4(0,0,1,0)}; + + float value; + if( dot3F4( p, p ) >= m_scale*m_scale ) return 0; + + int faceIdx; + float x, y; + CubeMapUtils::calcCrd( p, faceIdx, x, y ); + x = (x*HEIGHT_RES) - 0.5f; + y = (y*HEIGHT_RES) - 0.5f; + + { + int xi = (int)(x); + int yi = (int)(y); + + value = sampleNormal( (Face)faceIdx, xi, yi ).w; + } + return value; +} + +bool ConvexHeightField::queryDistanceWithNormal( const float4& p, float4& normalOut ) const +{ + int faceIdx; + float x, y; + CubeMapUtils::calcCrd( p, faceIdx, x, y ); + x = (x*HEIGHT_RES) - 0.5f; + y = (y*HEIGHT_RES) - 0.5f; + + { + int xi = (int)(x); + int yi = (int)(y); + + normalOut = sampleNormal( (Face)faceIdx, xi, yi ); + } + return true; +} + +void ConvexHeightField::calcSamplePoints(float4* points) const +{ + for(u32 faceIdx=0; faceIdx<6; faceIdx++) + { + for(int i=0; ir2[0])? 1:0; + idx = (r2[2]>r2[idx])? 2:idx; + majorAxis = majorAxes[idx]; + + bool isNeg = dot3F4( p, majorAxis ) < 0.f; + + faceIdxOut = (idx*2+((isNeg)? 0:1)); +//== + float4 abs = make_float4( fabs(p.x), fabs(p.y), fabs(p.z), 0.f ); + + float d; + if( idx == 0 ) + { + x = p.y; + y = p.z; + d = abs.x; + } + else if( idx == 1 ) + { + x = p.z; + y = p.x; + d = abs.y; + } + else + { + x = p.x; + y = p.y; + d = abs.z; + } + + float dInv = (d==0.f)? 0.f: (1.f/d); + x = (x*dInv+1.f)*0.5f; + y = (y*dInv+1.f)*0.5f; + } +} + +__inline +float4 CubeMapUtils::calcVector(int faceIdx, float x, float y) +{ + int dir = faceIdx/2; + float z = (faceIdx%2 == 0)? -1.f:1.f; + + x = x*2.f-1.f; + y = y*2.f-1.f; + + if( dir == 0 ) + { + return make_float4(z, x, y); + } + else if( dir == 1 ) + { + return make_float4(y,z,x); + } + else + { + return make_float4(x,y,z); + } +} + diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/CustomCollisionDispatcher.cpp b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/CustomCollisionDispatcher.cpp new file mode 100644 index 000000000..762afd2cb --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/CustomCollisionDispatcher.cpp @@ -0,0 +1,699 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Erwin Coumans + +#include "CustomCollisionDispatcher.h" +#include "BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h" +#include "BulletCollision/CollisionDispatch/btCollisionObject.h" +#include "CustomConvexShape.h" +#include "CustomConvexPairCollision.h" +#include "LinearMath/btQuickprof.h" + + + +#ifdef CL_PLATFORM_AMD + +#include "Adl/Adl.h" +#include "Stubs/AdlMath.h" +#include "Stubs/AdlContact4.h" +#include "Stubs/AdlQuaternion.h" +#include "Stubs/ChNarrowPhase.h" + +#include "Stubs/Solver.h" + + +struct CustomDispatchData +{ + adl::DeviceCL* m_ddcl; + adl::Device* m_deviceHost; + ShapeDataType m_ShapeBuffer; + + adl::HostBuffer* m_pBufPairsCPU; + adl::Buffer* m_pBufPairsGPU; + adl::Buffer* m_pBufContactOutGPU; + adl::HostBuffer* m_pBufContactOutCPU; + adl::ChNarrowphase::Data* m_Data; + + adl::HostBuffer* m_pBufRBodiesCPU; + adl::Buffer* m_pBufRBodiesGPU; + + adl::Buffer* m_bodyInfoBufferCPU; + adl::Buffer* m_bodyInfoBufferGPU; + + adl::Solver::Data* m_solverDataGPU; + SolverData m_contactCGPU; + void* m_frictionCGPU; + + int m_numAcceleratedShapes; +}; +#endif //CL_PLATFORM_AMD + +CustomCollisionDispatcher::CustomCollisionDispatcher(btCollisionConfiguration* collisionConfiguration +#ifdef CL_PLATFORM_AMD + , cl_context context,cl_device_id device,cl_command_queue queue +#endif //CL_PLATFORM_AMD +):btCollisionDispatcher(collisionConfiguration), +m_internalData(0) +{ +#ifdef CL_PLATFORM_AMD + + if (context && queue) + { + m_internalData = new CustomDispatchData(); + memset(m_internalData,0,sizeof(CustomDispatchData)); + + adl::DeviceUtils::Config cfg; + m_internalData->m_ddcl = new adl::DeviceCL(); + m_internalData->m_ddcl->m_deviceIdx = device; + m_internalData->m_ddcl->m_context = context; + m_internalData->m_ddcl->m_commandQueue = queue; + m_internalData->m_ddcl->m_kernelManager = new adl::KernelManager; + + + m_internalData->m_deviceHost = adl::DeviceUtils::allocate( adl::TYPE_HOST, cfg ); + m_internalData->m_pBufPairsCPU = new adl::HostBuffer(m_internalData->m_deviceHost, MAX_BROADPHASE_COLLISION_CL); + m_internalData->m_pBufContactOutCPU = new adl::HostBuffer(m_internalData->m_deviceHost, MAX_BROADPHASE_COLLISION_CL); + m_internalData->m_pBufRBodiesCPU = new adl::HostBuffer(m_internalData->m_deviceHost, MAX_CONVEX_BODIES_CL); + + m_internalData->m_bodyInfoBufferCPU = new adl::Buffer(m_internalData->m_deviceHost,MAX_CONVEX_BODIES_CL); + m_internalData->m_pBufContactOutGPU = new adl::Buffer(m_internalData->m_ddcl, MAX_BROADPHASE_COLLISION_CL); + m_internalData->m_bodyInfoBufferGPU = new adl::Buffer(m_internalData->m_ddcl,MAX_CONVEX_BODIES_CL); + m_internalData->m_pBufPairsGPU = new adl::Buffer(m_internalData->m_ddcl, MAX_BROADPHASE_COLLISION_CL); + m_internalData->m_solverDataGPU = adl::Solver::allocate( m_internalData->m_ddcl, MAX_BROADPHASE_COLLISION_CL); + m_internalData->m_pBufRBodiesGPU = new adl::Buffer(m_internalData->m_ddcl, MAX_CONVEX_BODIES_CL); + m_internalData->m_Data = adl::ChNarrowphase::allocate(m_internalData->m_ddcl); + m_internalData->m_ShapeBuffer = adl::ChNarrowphase::allocateShapeBuffer(m_internalData->m_ddcl, MAX_CONVEX_SHAPES_CL); + m_internalData->m_numAcceleratedShapes = 0; + + m_internalData->m_contactCGPU = adl::Solver::allocateConstraint4( m_internalData->m_ddcl, MAX_BROADPHASE_COLLISION_CL); + m_internalData->m_frictionCGPU = adl::Solver::allocateFrictionConstraint( m_internalData->m_ddcl, MAX_BROADPHASE_COLLISION_CL); + + } + + + +#endif //CL_PLATFORM_AMD +} + +CustomCollisionDispatcher::~CustomCollisionDispatcher(void) +{ +#ifdef CL_PLATFORM_AMD + if (m_internalData) + { + delete m_internalData->m_pBufPairsCPU; + delete m_internalData->m_pBufPairsGPU; + delete m_internalData->m_pBufContactOutGPU; + delete m_internalData->m_pBufContactOutCPU; + + adl::Solver::deallocateConstraint4( m_internalData->m_contactCGPU ); + adl::Solver::deallocateFrictionConstraint( m_internalData->m_frictionCGPU ); + + + adl::Solver::deallocate(m_internalData->m_solverDataGPU); + + adl::DeviceUtils::deallocate(m_internalData->m_deviceHost); + delete m_internalData->m_ddcl; + delete m_internalData; + } + +#endif //CL_PLATFORM_AMD + +} + + +#ifdef CL_PLATFORM_AMD +#include "BulletDynamics/Dynamics/btRigidBody.h" + +RigidBodyBase::Shape CreateBodyInfo(const btCollisionObject& colObj) +{ + RigidBodyBase::Shape shape; + const btRigidBody* bulletBody = btRigidBody::upcast(&colObj); + if( colObj.isStaticOrKinematicObject() || !bulletBody) + { + + //body.m_quat = qtGetIdentity(); + //body.m_invMass = 0.f; + shape.m_initInvInertia = mtZero(); + shape.m_invInertia = mtZero(); + } + else + { + + btVector3 invLocalInertia = bulletBody->getInvInertiaDiagLocal(); + shape.m_initInvInertia = mtZero(); + shape.m_initInvInertia.m_row[0].x = invLocalInertia.x(); + shape.m_initInvInertia.m_row[1].y = invLocalInertia.y(); + shape.m_initInvInertia.m_row[2].z = invLocalInertia.z(); + + btQuaternion q = colObj.getWorldTransform().getRotation(); + Quaternion qBody; + qBody.x = q.getX(); + qBody.y = q.getY(); + qBody.z = q.getZ(); + qBody.w = q.getW(); + + Matrix3x3 m = qtGetRotationMatrix( qBody); + Matrix3x3 mT = mtTranspose( m ); + shape.m_invInertia = mtMul( mtMul( m, shape.m_initInvInertia ), mT ); + //bulletBody->getInvInertiaTensorWorld(); + + + + + // shape.m_initInvInertia = mtInvert( localInertia ); + } + return shape; +} + +RigidBodyBase::Body CreateRBodyCL(const btCollisionObject& colObj, int shapeIdx) +{ + RigidBodyBase::Body bodyCL; + + + // position + const btVector3& p = colObj.getWorldTransform().getOrigin(); + bodyCL.m_pos.x = p.getX(); + bodyCL.m_pos.y = p.getY(); + bodyCL.m_pos.z = p.getZ(); + bodyCL.m_pos.w = 0.0f; + + // quaternion + btQuaternion q = colObj.getWorldTransform().getRotation(); + bodyCL.m_quat.x = q.getX(); + bodyCL.m_quat.y = q.getY(); + bodyCL.m_quat.z = q.getZ(); + bodyCL.m_quat.w = q.getW(); + + const btRigidBody* bulletBody = btRigidBody::upcast(&colObj); + if( colObj.isStaticOrKinematicObject() || !bulletBody) + { + // linear velocity + bodyCL.m_linVel = make_float4(0.0f, 0.0f, 0.0f); + + // angular velocity + bodyCL.m_angVel = make_float4(0.0f, 0.0f, 0.0f); + bodyCL.m_invMass = 0.f; + } else + { + // linear velocity + const btVector3& lv = bulletBody->getLinearVelocity(); + const btVector3& av = bulletBody->getAngularVelocity(); + + bodyCL.m_linVel = make_float4(lv.x(),lv.y(),lv.z(),0.0f); + // angular velocity + bodyCL.m_angVel = make_float4(av.x(),av.y(),av.z(),0.0f); + bodyCL.m_invMass = bulletBody->getInvMass(); + } + // shape index + bodyCL.m_shapeIdx = shapeIdx; + + + // restituition coefficient + bodyCL.m_restituitionCoeff = colObj.getRestitution(); + + // friction coefficient + bodyCL.m_frictionCoeff = colObj.getFriction(); + + return bodyCL; +} +#endif //CL_PLATFORM_AMD + +void CustomCollisionDispatcher::dispatchAllCollisionPairs(btOverlappingPairCache* pairCache,const btDispatcherInfo& dispatchInfo,btDispatcher* dispatcher) +{ + BT_PROFILE("CustomCollisionDispatcher::dispatchAllCollisionPairs"); + { + btBroadphasePairArray& overlappingPairArray = pairCache->getOverlappingPairArray(); + bool bGPU = (m_internalData != 0); +#ifdef CL_PLATFORM_AMD + if ( !bGPU ) +#endif //CL_PLATFORM_AMD + { + BT_PROFILE("btCollisionDispatcher::dispatchAllCollisionPairs"); + btCollisionDispatcher::dispatchAllCollisionPairs(pairCache,dispatchInfo,dispatcher); + } +#ifdef CL_PLATFORM_AMD + + else + { + { + BT_PROFILE("refreshContactPoints"); + //---------------------------------------------------------------- + // GPU version of convex heightmap narrowphase collision detection + //---------------------------------------------------------------- + for ( int i = 0; i < getNumManifolds(); i++ ) + { + btPersistentManifold* manifold = getManifoldByIndexInternal(i); + + + btCollisionObject* body0 = (btCollisionObject*)manifold->getBody0(); + btCollisionObject* body1 = (btCollisionObject*)manifold->getBody1(); + + manifold->refreshContactPoints(body0->getWorldTransform(),body1->getWorldTransform()); + } + } + + // OpenCL + int nColPairsFromBP = overlappingPairArray.size(); + btAssert(MAX_BROADPHASE_COLLISION_CL >= nColPairsFromBP); + + int maxBodyIndex = -1; + + { + BT_PROFILE("CreateRBodyCL and GPU pairs"); + for ( int i=0; im_pProxy0->m_clientObject; + btCollisionObject* colObj1 = (btCollisionObject*)pair->m_pProxy1->m_clientObject; + + int bodyIndex0 = colObj0->getCompanionId(); + int bodyIndex1 = colObj1->getCompanionId(); + + //keep a one-to-one mapping between Bullet and Adl broadphase pairs + (*m_internalData->m_pBufPairsCPU)[i].x = bodyIndex0; + (*m_internalData->m_pBufPairsCPU)[i].y = bodyIndex1; + + if (bodyIndex0>=0 && bodyIndex1>=0) + { + //create companion shapes (if necessary) + + btAssert(colObj0->getCollisionShape()->getShapeType() == CUSTOM_POLYHEDRAL_SHAPE_TYPE); + btAssert(colObj1->getCollisionShape()->getShapeType() == CUSTOM_POLYHEDRAL_SHAPE_TYPE); + + CustomConvexShape* convexShape0 = (CustomConvexShape*)colObj0->getCollisionShape(); + CustomConvexShape* convexShape1 = (CustomConvexShape*)colObj1->getCollisionShape(); + + if (convexShape0->m_acceleratedCompanionShapeIndex<0) + { + convexShape0->m_acceleratedCompanionShapeIndex = m_internalData->m_numAcceleratedShapes; + adl::ChNarrowphase::setShape(m_internalData->m_ShapeBuffer, convexShape0->m_ConvexHeightField, convexShape0->m_acceleratedCompanionShapeIndex, 0.0f); + m_internalData->m_numAcceleratedShapes++; + } + if (convexShape1->m_acceleratedCompanionShapeIndex<0) + { + convexShape1->m_acceleratedCompanionShapeIndex = m_internalData->m_numAcceleratedShapes; + adl::ChNarrowphase::setShape(m_internalData->m_ShapeBuffer, convexShape1->m_ConvexHeightField, convexShape1->m_acceleratedCompanionShapeIndex, 0.0f); + m_internalData->m_numAcceleratedShapes++; + } + + btAssert(m_internalData->m_numAcceleratedShapesmaxBodyIndex) + maxBodyIndex = bodyIndex0; + if (bodyIndex1>maxBodyIndex) + maxBodyIndex = bodyIndex1; + + btAssert(maxBodyIndex=MAX_CONVEX_BODIES_CL) + { + printf("error: maxBodyIndex(%d)>MAX_CONVEX_BODIES_CL(%d)\n",maxBodyIndex,MAX_CONVEX_BODIES_CL); + } + + (*m_internalData->m_pBufRBodiesCPU)[bodyIndex0] = CreateRBodyCL(*colObj0, convexShape0->m_acceleratedCompanionShapeIndex); + m_internalData->m_bodyInfoBufferCPU->m_ptr[bodyIndex0] = CreateBodyInfo(*colObj0); + (*m_internalData->m_pBufRBodiesCPU)[bodyIndex1] = CreateRBodyCL(*colObj1, convexShape0->m_acceleratedCompanionShapeIndex); + m_internalData->m_bodyInfoBufferCPU->m_ptr[bodyIndex1] = CreateBodyInfo(*colObj1); + } else + { + //TODO: dispatch using default dispatcher + btAssert(0); + } + } + } + + + if (maxBodyIndex>=0) + { + + int numOfConvexRBodies = maxBodyIndex+1; + + + + adl::ChNarrowphaseBase::Config cfgNP; + cfgNP.m_collisionMargin = 0.01f; + int nContactOut = 0; + + { + BT_PROFILE("ChNarrowphase::execute"); + adl::ChNarrowphase::execute(m_internalData->m_Data, m_internalData->m_pBufPairsGPU, nColPairsFromBP, m_internalData->m_pBufRBodiesGPU, m_internalData->m_ShapeBuffer, m_internalData->m_pBufContactOutGPU, nContactOut, cfgNP); + adl::DeviceUtils::waitForCompletion(m_internalData->m_ddcl); + } + + + bool useCpu = false;//true; + bool useSolver = true;//true;//false; + + if (useSolver) + { + float dt=1./60.; + adl::SolverBase::ConstraintCfg csCfg( dt ); + csCfg.m_enableParallelSolve = true; + csCfg.m_averageExtent = 0.2f;//@TODO m_averageObjExtent; + csCfg.m_staticIdx = -1;//numOfConvexRBodies-1;//m_nBodies-1; + + + if (useCpu) + { + + { + BT_PROFILE("read m_pBufContactOutGPU"); + m_internalData->m_pBufContactOutGPU->read(m_internalData->m_pBufContactOutCPU->m_ptr, nContactOut);//MAX_BROADPHASE_COLLISION_CL); + adl::DeviceUtils::waitForCompletion(m_internalData->m_ddcl); + } + + BT_PROFILE("CPU stuff"); + adl::Solver::Data* solverData = adl::Solver::allocate( m_internalData->m_deviceHost, nContactOut); + + SolverData contactCPU = adl::Solver::allocateConstraint4( + m_internalData->m_deviceHost, + numOfConvexRBodies*MAX_PAIRS_PER_BODY_CL ); + + void* frictionCPU = adl::Solver::allocateFrictionConstraint( + m_internalData->m_deviceHost, + numOfConvexRBodies*MAX_PAIRS_PER_BODY_CL ); + + //write body with current linear/angluar velocities to GPU + m_internalData->m_bodyInfoBufferGPU->write(m_internalData->m_bodyInfoBufferCPU->m_ptr,numOfConvexRBodies); + adl::DeviceUtils::waitForCompletion(m_internalData->m_ddcl); + + + if (nContactOut) + { + reorderConvertToConstraints2( + solverData, + m_internalData->m_pBufRBodiesCPU, + m_internalData->m_bodyInfoBufferCPU, + m_internalData->m_pBufContactOutCPU, + contactCPU, + frictionCPU, + nContactOut, + csCfg ); + + bool forceGPU = true; + + if (forceGPU) + { + + SolverData contactCPUcopy = adl::Solver::allocateConstraint4( + m_internalData->m_deviceHost, + numOfConvexRBodies*MAX_PAIRS_PER_BODY_CL ); + + adl::Solver::reorderConvertToConstraints( + m_internalData->m_solverDataGPU, + m_internalData->m_pBufRBodiesGPU, + m_internalData->m_bodyInfoBufferGPU, + m_internalData->m_pBufContactOutGPU, + m_internalData->m_contactCGPU, + m_internalData->m_frictionCGPU, + nContactOut, + csCfg ); + + adl::DeviceUtils::waitForCompletion(m_internalData->m_ddcl); + m_internalData->m_contactCGPU->read(contactCPUcopy->m_ptr,nContactOut); + adl::DeviceUtils::waitForCompletion(m_internalData->m_ddcl); + + + //m_internalData->m_contactCGPU->write(contactCPU->m_ptr,nContactOut); + adl::DeviceUtils::waitForCompletion(m_internalData->m_ddcl); + m_internalData->m_solverDataGPU->m_nIterations = 4; + + adl::Solver::solveContactConstraint( m_internalData->m_solverDataGPU, + m_internalData->m_pBufRBodiesGPU, + m_internalData->m_bodyInfoBufferGPU, + m_internalData->m_contactCGPU, + 0, + nContactOut ); + + adl::DeviceUtils::waitForCompletion( m_internalData->m_ddcl ); + + //read body updated linear/angular velocities back to CPU + m_internalData->m_pBufRBodiesGPU->read( + m_internalData->m_pBufRBodiesCPU->m_ptr,numOfConvexRBodies); + adl::DeviceUtils::waitForCompletion( m_internalData->m_ddcl ); + + } else + { + solverData->m_nIterations = 4; + adl::Solver::solveContactConstraint( solverData, + m_internalData->m_pBufRBodiesCPU, + m_internalData->m_bodyInfoBufferCPU, + contactCPU, + 0, + nContactOut ); + } + + + + } + + adl::Solver::deallocateConstraint4( contactCPU ); + adl::Solver::deallocateFrictionConstraint( frictionCPU ); + adl::Solver::deallocate( solverData ); + + + + } + else + { + + { + BT_PROFILE("rigid body data to GPU buffer"); + // Transfer rigid body data from CPU buffer to GPU buffer + m_internalData->m_pBufRBodiesGPU->write(m_internalData->m_pBufRBodiesCPU->m_ptr, numOfConvexRBodies); + m_internalData->m_pBufPairsGPU->write(m_internalData->m_pBufPairsCPU->m_ptr, MAX_BROADPHASE_COLLISION_CL); + //write body with current linear/angluar velocities to GPU + m_internalData->m_bodyInfoBufferGPU->write(m_internalData->m_bodyInfoBufferCPU->m_ptr,numOfConvexRBodies); + adl::DeviceUtils::waitForCompletion(m_internalData->m_ddcl); + } + { + BT_PROFILE("GPU reorderConvertToConstraints"); + adl::Solver::reorderConvertToConstraints( + m_internalData->m_solverDataGPU, + m_internalData->m_pBufRBodiesGPU, + m_internalData->m_bodyInfoBufferGPU, + m_internalData->m_pBufContactOutGPU, + m_internalData->m_contactCGPU, + m_internalData->m_frictionCGPU, + nContactOut, + csCfg ); + } + + { + BT_PROFILE("GPU solveContactConstraint"); + m_internalData->m_solverDataGPU->m_nIterations = 4; + + adl::Solver::solveContactConstraint( m_internalData->m_solverDataGPU, + m_internalData->m_pBufRBodiesGPU, + m_internalData->m_bodyInfoBufferGPU, + m_internalData->m_contactCGPU, + 0, + nContactOut ); + + adl::DeviceUtils::waitForCompletion( m_internalData->m_ddcl ); + } + { + BT_PROFILE("read body velocities back to CPU"); + //read body updated linear/angular velocities back to CPU + m_internalData->m_pBufRBodiesGPU->read( + m_internalData->m_pBufRBodiesCPU->m_ptr,numOfConvexRBodies); + adl::DeviceUtils::waitForCompletion( m_internalData->m_ddcl ); + } + + + } + +#if 0 + if( !m_useGPUPipeline ) + { // CPU + BT_PROFILE("CPU solve"); + { + BT_PROFILE("CPU reorderConvertToConstraints"); + + SOLVER_CLASS::reorderConvertToConstraints( solver, m_bodyBuffer, m_bodyInfoBufferCPU, (Buffer*)m_contactBuffer, + contactC, frictionC, m_numContacts, csCfg ); + } + { + BT_PROFILE("CPU solveContactConstraint"); + + solver->m_nIterations = 4; + SOLVER_CLASS::solveContactConstraint( solver, m_bodyBuffer, m_bodyInfoBufferCPU, contactC, 0, m_numContacts ); + } + } + else + { + BT_PROFILE("GPU solve"); + { // GPU using host buffers + { + BT_PROFILE("GPU reorderConvertToConstraints"); + + Solver::reorderConvertToConstraints( m_solver, m_bodyBuffer, m_bodyInfoBufferCPU, (Buffer*)m_contactBuffer, + contactC, frictionC, m_numContacts, csCfg ); + } + timerEnd(); + + timerStart(0); + //for(int iter=0; iter<4; iter++) + { + BT_PROFILE("GPU solveContactConstraint"); + + Solver::solveContactConstraint( m_solver, m_bodyBuffer, m_bodyInfoBufferCPU, contactC, frictionC, m_numContacts ); + } + DeviceUtils::waitForCompletion( m_device ); + } + } + timerEnd(); +#endif + + + } + + //if we ran the solver, it will overwrite the batchIdx so we cannot write back the results + //try to make it work by writing velocity back to rigid body + + if (useSolver) + { + + BT_PROFILE("writing velocity back to btRigidBody"); + + for ( int i=0; im_pProxy0->m_clientObject; + btCollisionObject* colObj1 = (btCollisionObject*)pair->m_pProxy1->m_clientObject; + + int bodyIndex0 = colObj0->getCompanionId(); + int bodyIndex1 = colObj1->getCompanionId(); + + RigidBodyBase::Body* bA = &m_internalData->m_pBufRBodiesCPU->m_ptr[bodyIndex0]; + RigidBodyBase::Body* bB = &m_internalData->m_pBufRBodiesCPU->m_ptr[bodyIndex1]; + btRigidBody* bodyA = btRigidBody::upcast(colObj0); + if (bodyA && !bodyA->isStaticOrKinematicObject()) + { + bodyA->setLinearVelocity(btVector3( + bA->m_linVel.x, + bA->m_linVel.y, + bA->m_linVel.z)); + + bodyA->setAngularVelocity(btVector3( + bA->m_angVel.x, + bA->m_angVel.y, + bA->m_angVel.z)); + } + btRigidBody* bodyB = btRigidBody::upcast(colObj1); + if (bodyB && !bodyB->isStaticOrKinematicObject()) + { + bodyB->setLinearVelocity(btVector3( + bB->m_linVel.x, + bB->m_linVel.y, + bB->m_linVel.z)); + bodyB->setAngularVelocity(btVector3( + bB->m_angVel.x, + bB->m_angVel.y, + bB->m_angVel.z)); + + } + + + + + } + } else + { + BT_PROFILE("copy Contact4 to btPersistentManifold"); + // Now we got the narrowphase info from GPU and need to update rigid bodies with the info and go back to the original pipeline in Bullet physics. + for ( int i = 0; i < nContactOut; i++ ) + { + Contact4 contact = (*m_internalData->m_pBufContactOutCPU)[i]; + + int idxBodyA = contact.m_bodyAPtr; + int idxBodyB = contact.m_bodyBPtr; + + btAssert(contact.m_batchIdx>=0); + btAssert(contact.m_batchIdxm_pProxy0->m_clientObject; + btCollisionObject* colObj1 = (btCollisionObject*)pair->m_pProxy1->m_clientObject; + + if (!pair->m_algorithm) + { + pair->m_algorithm = findAlgorithm(colObj0,colObj1,0); + } + + btManifoldResult contactPointResult(colObj0, colObj1); + + + CustomConvexConvexPairCollision* pairAlgo = (CustomConvexConvexPairCollision*) pair->m_algorithm; + + if (!pairAlgo->getManifoldPtr()) + { + pairAlgo->createManifoldPtr(colObj0,colObj1,dispatchInfo); + } + + contactPointResult.setPersistentManifold(pairAlgo->getManifoldPtr()); + + contactPointResult.getPersistentManifold()->refreshContactPoints(colObj0->getWorldTransform(),colObj1->getWorldTransform()); + + const btTransform& transA = colObj0->getWorldTransform(); + const btTransform& transB = colObj1->getWorldTransform(); + + int numPoints = contact.getNPoints(); + + for ( int k=0; k < numPoints; k++ ) + { + btVector3 normalOnBInWorld( + contact.m_worldNormal.x, + contact.m_worldNormal.y, + contact.m_worldNormal.z); + btVector3 pointInWorldOnB( + contact.m_worldPos[k].x, + contact.m_worldPos[k].y, + contact.m_worldPos[k].z); + + btScalar depth = contact.m_worldPos[k].w; + + if (depth<0) + { + const btVector3 deltaC = transB.getOrigin() - transA.getOrigin(); + + normalOnBInWorld.normalize(); + + if((deltaC.dot(normalOnBInWorld))>0.0f) + { + normalOnBInWorld= -normalOnBInWorld; + + contactPointResult.addContactPoint(normalOnBInWorld, pointInWorldOnB, depth); + } + else + { + contactPointResult.addContactPoint(normalOnBInWorld, pointInWorldOnB-normalOnBInWorld*depth, depth); + } + } + } + } + } + } + } +#endif //CL_PLATFORM_AMD + } + +} + diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/CustomCollisionDispatcher.h b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/CustomCollisionDispatcher.h new file mode 100644 index 000000000..315a4ba48 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/CustomCollisionDispatcher.h @@ -0,0 +1,70 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + +#ifndef CUSTOM_COLLISION_DISPATCHER_H +#define CUSTOM_COLLISION_DISPATCHER_H + + +#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h" +#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h" + + +#define MAX_CONVEX_BODIES_CL 64*1024 +#define MAX_PAIRS_PER_BODY_CL 32 +#define MAX_CONVEX_SHAPES_CL 8192 +#define MAX_BROADPHASE_COLLISION_CL (MAX_CONVEX_BODIES_CL*MAX_PAIRS_PER_BODY_CL) + + + +struct CustomDispatchData; + +#ifdef CL_PLATFORM_AMD +#ifdef __APPLE__ + #ifdef USE_MINICL + #include + #else + #include + #endif +#else //__APPLE__ + #ifdef USE_MINICL + #include + #else + #include + #endif +#endif //__APPLE__ +#endif + +class CustomCollisionDispatcher : public btCollisionDispatcher +{ +public: + CustomCollisionDispatcher (btCollisionConfiguration* collisionConfiguration +#ifdef CL_PLATFORM_AMD + , cl_context context = NULL,cl_device_id device = NULL,cl_command_queue queue = NULL +#endif //CL_PLATFORM_AMD + ); + + virtual ~CustomCollisionDispatcher(void); + +protected: + + CustomDispatchData* m_internalData; + + btBroadphasePair* GetPair(btBroadphasePairArray& pairArray, int idxBodyA, int idxBodyB); + +public: + virtual void dispatchAllCollisionPairs(btOverlappingPairCache* pairCache,const btDispatcherInfo& dispatchInfo,btDispatcher* dispatcher); +}; + +#endif //CUSTOM_COLLISION_DISPATCHER_H diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/CustomConvexPairCollision.cpp b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/CustomConvexPairCollision.cpp new file mode 100644 index 000000000..d0fd32c31 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/CustomConvexPairCollision.cpp @@ -0,0 +1,409 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Erwin Coumans + +#include "CustomConvexPairCollision.h" +#include "ConvexHeightFieldShape.h" +#include "CustomConvexShape.h" +#include "BulletCollision/CollisionDispatch/btCollisionObject.h" +#include "Stubs/AdlContact4.h" +#include "Stubs/AdlTransform.h" + + +CustomConvexConvexPairCollision::CustomConvexConvexPairCollision(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1, btSimplexSolverInterface* simplexSolver, btConvexPenetrationDepthSolver* pdSolver, int numPerturbationIterations, int minimumPointsPerturbationThreshold) +:btConvexConvexAlgorithm(mf,ci,body0,body1,simplexSolver,pdSolver,numPerturbationIterations, minimumPointsPerturbationThreshold) +{ + +} + +CustomConvexConvexPairCollision::~CustomConvexConvexPairCollision() +{ + +} + + +#include + +template +T atomAdd(const T* ptr, int value) +{ + return (T)InterlockedExchangeAdd((LONG*)ptr, value); +} + + + +#define PARALLEL_SUM(v, n) for(int j=1; j v[i+offset].y)? v[i]: v[i+offset]; } +#define REDUCE_MIN(v, n) {int i=0;\ + for(int offset=0; offset a[ie].x )? a[0].x: a[ie].x; + a[0].y = (a[0].y > a[ie].y )? a[0].y: a[ie].y; + a[0].z = (a[0].z > a[ie].z )? a[0].z: a[ie].z; + a[0].w = (a[0].w > a[ie].w )? a[0].w: a[ie].w; + } + + idx[0] = (int)a[0].x & 0xff; + idx[1] = (int)a[0].y & 0xff; + idx[2] = (int)a[0].z & 0xff; + idx[3] = (int)a[0].w & 0xff; + } + } + + { + float2 h[64]; + PARALLEL_DO( h[ie] = make_float2((float)ie, p[ie].w), nPoints ); + REDUCE_MIN( h, nPoints ); + max00 = h[0]; + } + } + + contactIdx[0] = idx[0]; + contactIdx[1] = idx[1]; + contactIdx[2] = idx[2]; + contactIdx[3] = idx[3]; + +// if( max00.y < 0.0f ) +// contactIdx[0] = (int)max00.x; + + std::sort( contactIdx, contactIdx+4 ); + + return 4; + } +} + +#undef PARALLEL_SUM +#undef PARALLEL_DO +#undef REDUCE_MAX +#undef REDUCE_MIX + +int collideStraight(const ConvexHeightField* shapeA,const ConvexHeightField* shapeB, + const float4& bodyApos, Quaternion& bodyAquat,const float4& bodyBpos,const Quaternion& bodyBquat, + ContactPoint4* contactsOut, int& numContacts, int contactCapacity, + float collisionMargin ) +{ +// Stopwatch sw; + + Transform trA; + trA = trSetTransform(bodyApos,bodyAquat); + Transform trB; + trB = trSetTransform(bodyBpos, bodyBquat); + + Transform B2A; + { + Transform invTrA = trInvert( trA ); + B2A = trMul( invTrA, trB ); + } + + int nContacts = 0; + { // testB against A + float4 p[ConvexHeightField::HEIGHT_RES*ConvexHeightField::HEIGHT_RES*6]; + int nHits = 0; + + const float4* pInB = shapeB->getSamplePoints(); + + float4 baInB = qtInvRotate( bodyBquat, bodyApos - bodyBpos ); + if( shapeA->m_type == CollisionShape::SHAPE_HEIGHT_FIELD ) + baInB = make_float4(0,0,0,0); + +// sw.start(); + for(int iface=0; iface<6; iface++) + { + Aabb aabb = shapeB->m_faceAabbs[iface]; + + aabb.transform( B2A.m_translation, B2A.m_rotation ); + + if( !shapeA->m_aabb.overlaps( aabb ) ) continue; + + for(int ip=0; ipm_aabb.overlaps( pInA ) ) + { +// Stopwatch sw1; +// sw1.start(); + float dist = shapeA->queryDistance( pInA ); +// sw1.stop(); +// m_times[TIME_SAMPLE] += sw1.getMs(); + + if( dist < collisionMargin ) + { + p[nHits] = make_float4(pInA.x, pInA.y, pInA.z, dist); + nHits++; + } + } + } + } +// sw.stop(); +// m_times[TIME_TEST] += sw.getMs(); + +// sw.start(); + if( nHits ) + { + float4 ab = bodyBpos - bodyApos; + ab = qtInvRotate( bodyAquat, ab ); + if( shapeA->m_type == CollisionShape::SHAPE_HEIGHT_FIELD ) + { + //todo. sample normal from height field but just fake here + ab = make_float4(0,1,0,0); + } + + int cIdx[4]; + float4 center; + + nContacts = extractManifold( p, nHits, ab, center, cIdx ); + + float4 contactNormal; + { + shapeA->queryDistanceWithNormal( center, contactNormal ); + contactNormal = normalize3( contactNormal ); + +// u32 cmp = u8vCompress( contactNormal ); +// contactNormal = make_float4( u8vGetX(cmp), u8vGetY(cmp), u8vGetZ(cmp), 0 ); + } + + int writeIdx = atomAdd( &numContacts, 1 ); + if( writeIdx+1 < contactCapacity ) + { + ContactPoint4& c = contactsOut[writeIdx]; + nContacts = min2( nContacts, 4 ); + for(int i=0; igetNewManifold(body0,body1); + m_ownManifold = true; +} + + +void CustomConvexConvexPairCollision::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut) +{ +#if 1 + if (!m_manifoldPtr) + { + //swapped? + m_manifoldPtr = m_dispatcher->getNewManifold(body0,body1); + m_ownManifold = true; + } + resultOut->setPersistentManifold(m_manifoldPtr); + + + CustomConvexShape* convex0 = (CustomConvexShape*)body0->getCollisionShape(); + CustomConvexShape* convex1 = (CustomConvexShape*)body1->getCollisionShape(); + + + float4 bodyApos; + float4 bodyBpos; + Quaternion bodyAquat; + Quaternion bodyBquat; + + const btTransform& transA = body0->getWorldTransform(); + const btTransform& transB = body1->getWorldTransform(); + + const btVector3& pA = body0->getWorldTransform().getOrigin(); + const btVector3& pB = body1->getWorldTransform().getOrigin(); + + btQuaternion qA = body0->getWorldTransform().getRotation(); + btQuaternion qB = body1->getWorldTransform().getRotation(); + + bodyApos.x = pA.getX(); + bodyApos.y = pA.getY(); + bodyApos.z = pA.getZ(); + bodyApos.w = 0.f; + + bodyBpos.x = pB.getX(); + bodyBpos.y = pB.getY(); + bodyBpos.z = pB.getZ(); + bodyBpos.w = 0.f; + + bodyAquat.x = qA.getX(); + bodyAquat.y = qA.getY(); + bodyAquat.z = qA.getZ(); + bodyAquat.w = qA.getW(); + + bodyBquat.x = qB.getX(); + bodyBquat.y = qB.getY(); + bodyBquat.z = qB.getZ(); + bodyBquat.w = qB.getW(); + + +#define CAPACITY_CONTACTS 4 + + ContactPoint4 contactsOut[CAPACITY_CONTACTS]; + int freeContactIndex = 0; + int contactCapacity = CAPACITY_CONTACTS; + float collisionMargin = 0.001f; + + m_manifoldPtr->refreshContactPoints(body0->getWorldTransform(),body1->getWorldTransform()); + + collideStraight(convex0->m_ConvexHeightField,convex1->m_ConvexHeightField, + bodyApos, bodyAquat,bodyBpos,bodyBquat, + contactsOut, freeContactIndex, contactCapacity, + collisionMargin ); + collideStraight(convex1->m_ConvexHeightField,convex0->m_ConvexHeightField, + bodyBpos, bodyBquat,bodyApos,bodyAquat, + contactsOut, freeContactIndex, contactCapacity, + collisionMargin ); + + //copy points into manifold + //refresh manifold + + btAssert(freeContactIndex<3); + for (int j=0;j0.0f) + { + normalOnBInWorld= -normalOnBInWorld; + } + normalOnBInWorld.normalize(); + if (j) + { + resultOut->addContactPoint(normalOnBInWorld, pointInWorldOnB, depth); + } else + { + resultOut->addContactPoint(normalOnBInWorld, pointInWorldOnB-normalOnBInWorld*depth, depth); + } + } + } + } +#else + btConvexConvexAlgorithm::processCollision(body0,body1,dispatchInfo,resultOut); +#endif +} + + + +CustomConvexConvexPairCollision::CreateFunc::CreateFunc(btSimplexSolverInterface* simplexSolver, btConvexPenetrationDepthSolver* pdSolver) +:btConvexConvexAlgorithm::CreateFunc(simplexSolver,pdSolver) +{ +} + +CustomConvexConvexPairCollision::CreateFunc::~CreateFunc() +{ + +} \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/CustomConvexPairCollision.h b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/CustomConvexPairCollision.h new file mode 100644 index 000000000..bd3a085ca --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/CustomConvexPairCollision.h @@ -0,0 +1,56 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Erwin Coumans + +#ifndef CUSTOM_CONVEX_CONVEX_PAIR_COLLISION_H +#define CUSTOM_CONVEX_CONVEX_PAIR_COLLISION_H + + +#include "BulletCollision/CollisionDispatch/btConvexConvexAlgorithm.h" + +class CustomConvexConvexPairCollision : public btConvexConvexAlgorithm +{ + public: + + CustomConvexConvexPairCollision(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1, btSimplexSolverInterface* simplexSolver, btConvexPenetrationDepthSolver* pdSolver, int numPerturbationIterations, int minimumPointsPerturbationThreshold); + virtual ~CustomConvexConvexPairCollision(); + + virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut); + + btPersistentManifold* getManifoldPtr() + { + return m_manifoldPtr; + } + + void createManifoldPtr(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo); + + struct CreateFunc :public btConvexConvexAlgorithm::CreateFunc + { + + CreateFunc(btSimplexSolverInterface* simplexSolver, btConvexPenetrationDepthSolver* pdSolver); + + virtual ~CreateFunc(); + + virtual btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1) + { + void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(CustomConvexConvexPairCollision)); + return new(mem) CustomConvexConvexPairCollision(ci.m_manifold,ci,body0,body1,m_simplexSolver,m_pdSolver,m_numPerturbationIterations,m_minimumPointsPerturbationThreshold); + } + }; + + +}; + + +#endif //CUSTOM_CONVEX_CONVEX_PAIR_COLLISION_H diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/CustomConvexShape.cpp b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/CustomConvexShape.cpp new file mode 100644 index 000000000..de7d74dfa --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/CustomConvexShape.cpp @@ -0,0 +1,45 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Erwin Coumans + +#include "CustomConvexShape.h" +#include "ConvexHeightFieldShape.h" +#include "BulletCollision/CollisionShapes/btConvexPolyhedron.h" + + +CustomConvexShape::CustomConvexShape(const btScalar* points,int numPoints, int stride) +:btConvexHullShape(points,numPoints,stride), +m_acceleratedCompanionShapeIndex(-1) +{ + m_shapeType = CUSTOM_POLYHEDRAL_SHAPE_TYPE; + + initializePolyhedralFeatures(); + int numFaces= m_polyhedron->m_faces.size(); + float4* eqn = new float4[numFaces]; + for (int i=0;im_faces[i].m_plane[0]; + eqn[i].y = m_polyhedron->m_faces[i].m_plane[1]; + eqn[i].z = m_polyhedron->m_faces[i].m_plane[2]; + eqn[i].w = m_polyhedron->m_faces[i].m_plane[3]; + } + + m_ConvexHeightField = new ConvexHeightField(eqn,numFaces); + +} + +CustomConvexShape::~CustomConvexShape() +{ + delete m_ConvexHeightField; +} \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/CustomConvexShape.h b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/CustomConvexShape.h new file mode 100644 index 000000000..a514c94e8 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/CustomConvexShape.h @@ -0,0 +1,35 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Erwin Coumans + +#ifndef CUSTOM_CONVEX_SHAPE_H +#define CUSTOM_CONVEX_SHAPE_H + +#include "BulletCollision/CollisionShapes/btConvexHullShape.h" + +class CustomConvexShape : public btConvexHullShape +{ + public: + + class ConvexHeightField* m_ConvexHeightField; + + int m_acceleratedCompanionShapeIndex; + + CustomConvexShape(const btScalar* points,int numPoints,int stride); + virtual ~CustomConvexShape(); + +}; + +#endif //CUSTOM_CONVEX_SHAPE_H + diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlAabb.cpp b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlAabb.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlAabb.h b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlAabb.h new file mode 100644 index 000000000..0c6709020 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlAabb.h @@ -0,0 +1,230 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + +#ifndef AABB_H +#define AABB_H + +#include "Stubs/AdlMath.h" +#include "Stubs/AdlQuaternion.h" + +enum AdlCollisionShapeTypes +{ + ADL_SHAPE_SPHERE=2, + ADL_SHAPE_HEIGHT_FIELD, + SHAPE_CONVEX_HEIGHT_FIELD, +}; + +_MEM_CLASSALIGN16 +struct Aabb +{ + public: + _MEM_ALIGNED_ALLOCATOR16; + + __inline + void setEmpty(); + __inline + void includeVolume( const Aabb& aabb ); + __inline + void includePoint( const float4& p ); + __inline + bool overlaps( const float4& p ) const; + __inline + bool overlaps( const Aabb& aabb ) const; + __inline + float4 center() const; + __inline + int getMajorAxis() const; + __inline + float4 getExtent() const; + __inline + void expandBy( const float4& r ); + + __inline + static bool overlaps( const Aabb& a, const Aabb& b ); + + __inline + bool intersect(const float4* from, const float4* to, const float4* invRay) const; + + __inline + void transform(const float4& translation, const Quaternion& quat); + + __inline + void transform(const float4& translation, const Matrix3x3& rot); + + public: + float4 m_max; + float4 m_min; +}; + +void Aabb::setEmpty() +{ + m_max = make_float4( -FLT_MAX ); + m_min = make_float4( FLT_MAX ); +} + +void Aabb::includeVolume(const Aabb& aabb) +{ + m_max.x = max2( m_max.x, aabb.m_max.x ); + m_min.x = min2( m_min.x, aabb.m_min.x ); + + m_max.y = max2( m_max.y, aabb.m_max.y ); + m_min.y = min2( m_min.y, aabb.m_min.y ); + + m_max.z = max2( m_max.z, aabb.m_max.z ); + m_min.z = min2( m_min.z, aabb.m_min.z ); +} + +void Aabb::includePoint( const float4& p ) +{ + m_max.x = max2( m_max.x, p.x ); + m_min.x = min2( m_min.x, p.x ); + + m_max.y = max2( m_max.y, p.y ); + m_min.y = min2( m_min.y, p.y ); + + m_max.z = max2( m_max.z, p.z ); + m_min.z = min2( m_min.z, p.z ); +} + +bool Aabb::overlaps( const float4& p ) const +{ + float4 dx = m_max-p; + float4 dm = p-m_min; + + return (dx.x >= 0 && dx.y >= 0 && dx.z >= 0) + && (dm.x >= 0 && dm.y >= 0 && dm.z >= 0); +} + +bool Aabb::overlaps( const Aabb& in ) const +{ +/* + if( m_max.x < in.m_min.x || m_min.x > in.m_max.x ) return false; + if( m_max.y < in.m_min.y || m_min.y > in.m_max.y ) return false; + if( m_max.z < in.m_min.z || m_min.z > in.m_max.z ) return false; + + return true; +*/ + return overlaps( *this, in ); +} + +bool Aabb::overlaps( const Aabb& a, const Aabb& b ) +{ + if( a.m_max.x < b.m_min.x || a.m_min.x > b.m_max.x ) return false; + if( a.m_max.y < b.m_min.y || a.m_min.y > b.m_max.y ) return false; + if( a.m_max.z < b.m_min.z || a.m_min.z > b.m_max.z ) return false; + + return true; +} + +float4 Aabb::center() const +{ + return 0.5f*(m_max+m_min); +} + +int Aabb::getMajorAxis() const +{ + float4 extent = getExtent(); + + int majorAxis = 0; + if( extent.s[1] > extent.s[0] ) + majorAxis = 1; + if( extent.s[2] > extent.s[majorAxis] ) + majorAxis = 2; + + return majorAxis; +} + +float4 Aabb::getExtent() const +{ + return m_max-m_min; +} + +void Aabb::expandBy( const float4& r ) +{ + m_max += r; + m_min -= r; +} + +bool Aabb::intersect(const float4* from, const float4* to, const float4* invRay) const +{ + float4 dFar; + dFar = (m_max - *from); + dFar *= *invRay; + float4 dNear; + dNear = (m_min - *from); + dNear *= *invRay; + + float4 tFar; + tFar = max2(dFar, dNear); + float4 tNear; + tNear = min2(dFar, dNear); + + float farf[] = { tFar.x, tFar.y, tFar.z }; + + float nearf[] = { tNear.x, tNear.y, tNear.z }; + + float minFar = min2(farf[0], min2(farf[1], farf[2])); + float maxNear = max2(nearf[0], max2(nearf[1], nearf[2])); + + minFar = min2(1.0f, minFar ); + maxNear = max2(0.0f, maxNear); + + return (minFar >= maxNear); +} + +void Aabb::transform(const float4& translation, const Matrix3x3& m) +{ + float4 c = center(); + + Aabb& ans = *this; + + float4 e[] = { m.m_row[0]*m_min, m.m_row[1]*m_min, m.m_row[2]*m_min }; + float4 f[] = { m.m_row[0]*m_max, m.m_row[1]*m_max, m.m_row[2]*m_max }; + ans.m_max = ans.m_min = translation; + + { int j=0; + float4 mi = make_float4( min2( e[j].x, f[j].x ), min2( e[j].y, f[j].y ), min2( e[j].z, f[j].z ) ); + float4 ma = make_float4( max2( e[j].x, f[j].x ), max2( e[j].y, f[j].y ), max2( e[j].z, f[j].z ) ); + + ans.m_min.x += mi.x+mi.y+mi.z; + ans.m_max.x += ma.x+ma.y+ma.z; + } + + { int j=1; + float4 mi = make_float4( min2( e[j].x, f[j].x ), min2( e[j].y, f[j].y ), min2( e[j].z, f[j].z ) ); + float4 ma = make_float4( max2( e[j].x, f[j].x ), max2( e[j].y, f[j].y ), max2( e[j].z, f[j].z ) ); + + ans.m_min.y += mi.x+mi.y+mi.z; + ans.m_max.y += ma.x+ma.y+ma.z; + } + + { int j=2; + float4 mi = make_float4( min2( e[j].x, f[j].x ), min2( e[j].y, f[j].y ), min2( e[j].z, f[j].z ) ); + float4 ma = make_float4( max2( e[j].x, f[j].x ), max2( e[j].y, f[j].y ), max2( e[j].z, f[j].z ) ); + + ans.m_min.z += mi.x+mi.y+mi.z; + ans.m_max.z += ma.x+ma.y+ma.z; + } +} + +void Aabb::transform(const float4& translation, const Quaternion& quat) +{ + Matrix3x3 m = qtGetRotationMatrix( quat ); + + transform( translation, m ); +} + +#endif + diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlArray.h b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlArray.h new file mode 100644 index 000000000..e7fe5e3d1 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlArray.h @@ -0,0 +1,212 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +#ifndef ARRAY_H +#define ARRAY_H + +#include +#include +#include +#include + + +template +class Array +{ + public: + __inline + Array(); + __inline + Array(int size); + __inline + ~Array(); + __inline + T& operator[] (int idx); + __inline + const T& operator[] (int idx) const; + __inline + void pushBack(const T& elem); + __inline + void popBack(); + __inline + void clear(); + __inline + void setSize(int size); + __inline + int getSize() const; + __inline + T* begin(); + __inline + const T* begin() const; + __inline + int indexOf(const T& data) const; + __inline + void removeAt(int idx); + __inline + T& expandOne(); + + private: + Array(const Array& a){} + + private: + enum + { + DEFAULT_SIZE = 128, + INCREASE_SIZE = 128, + }; + + T* m_data; + int m_size; + int m_capacity; +}; + +template +Array::Array() +{ + m_size = 0; + m_capacity = DEFAULT_SIZE; +// m_data = new T[ m_capacity ]; + m_data = (T*)_aligned_malloc(sizeof(T)*m_capacity, 16); + for(int i=0; i +Array::Array(int size) +{ + m_size = size; + m_capacity = size; +// m_data = new T[ m_capacity ]; + m_data = (T*)_aligned_malloc(sizeof(T)*m_capacity, 16); + for(int i=0; i +Array::~Array() +{ + if( m_data ) + { +// delete [] m_data; + _aligned_free( m_data ); + m_data = NULL; + } +} + +template +T& Array::operator[](int idx) +{ + CLASSERT(idx +const T& Array::operator[](int idx) const +{ + CLASSERT(idx +void Array::pushBack(const T& elem) +{ + if( m_size == m_capacity ) + { + int oldCap = m_capacity; + m_capacity += INCREASE_SIZE; +// T* s = new T[m_capacity]; + T* s = (T*)_aligned_malloc(sizeof(T)*m_capacity, 16); + memcpy( s, m_data, sizeof(T)*oldCap ); +// delete [] m_data; + _aligned_free( m_data ); + m_data = s; + } + m_data[ m_size++ ] = elem; +} + +template +void Array::popBack() +{ + CLASSERT( m_size>0 ); + m_size--; +} + +template +void Array::clear() +{ + m_size = 0; +} + +template +void Array::setSize(int size) +{ + if( size > m_capacity ) + { + int oldCap = m_capacity; + m_capacity = size; +// T* s = new T[m_capacity]; + T* s = (T*)_aligned_malloc(sizeof(T)*m_capacity, 16); + for(int i=0; i +int Array::getSize() const +{ + return m_size; +} + +template +const T* Array::begin() const +{ + return m_data; +} + +template +T* Array::begin() +{ + return m_data; +} + +template +int Array::indexOf(const T& data) const +{ + for(int i=0; i +void Array::removeAt(int idx) +{ + CLASSERT(idx +T& Array::expandOne() +{ + setSize( m_size+1 ); + return m_data[ m_size-1 ]; +} + +#endif + diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlCollideUtils.h b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlCollideUtils.h new file mode 100644 index 000000000..84d658318 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlCollideUtils.h @@ -0,0 +1,111 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +#ifndef COLLIDE_UTILS_H +#define COLLIDE_UTILS_H + +#include "Stubs/AdlMath.h" + + +class CollideUtils +{ + public: + template + static bool collide(const float4& a, const float4& b, const float4& c, const float4& p, float4& normalOut, float margin = 0.f); + + __inline + static float castRay(const float4& v0, const float4& v1, const float4& v2, + const float4& rayFrom, const float4& rayTo, float margin = 0.0f, float4* bCrdOut = NULL); + +}; + + +template +bool CollideUtils::collide(const float4& a, const float4& b, const float4& c, const float4& p, float4& normalOut, float margin) +{ + float4 ab, bc, ca; + ab = b-a; + bc = c-b; + ca = a-c; + + float4 ap, bp, cp; + ap = p-a; + bp = p-b; + cp = p-c; + + float4 n; + n = cross3(ab, -1.f*ca); + + float4 abp = cross3( ab, ap ); + float4 bcp = cross3( bc, bp ); + float4 cap = cross3( ca, cp ); + + float s0 = dot3F4(n,abp); + float s1 = dot3F4(n,bcp); + float s2 = dot3F4(n,cap); + +// if(( s0<0.f && s1<0.f && s2<0.f ) || ( s0>0.f && s1>0.f && s2>0.f )) + if(( s0-margin && s1>-margin && s2>-margin )) + { + n = normalize3( n ); + n.w = dot3F4(n,ap); + + normalOut = (FLIPSIGN)? -n : n; + return true; + } + + return false; +} + +__inline +float CollideUtils::castRay(const float4& v0, const float4& v1, const float4& v2, + const float4& rayFrom, const float4& rayTo, float margin, float4* bCrdOut) +{ + float t, v, w; + float4 ab; ab = v1 - v0; + float4 ac; ac = v2 - v0; + float4 qp; qp = rayFrom - rayTo; + float4 normal = cross3( ab, ac ); + float d = dot3F4( qp, normal ); + float odd = 1.f/d; + float4 ap; ap = rayFrom - v0; + t = dot3F4( ap, normal ); + t *= odd; +// if( t < 0.f || t > 1.f ) return -1; + + float4 e = cross3( qp, ap ); + v = dot3F4( ac, e ); + v *= odd; + if( v < -margin || v > 1.f+margin ) return -1; + w = -dot3F4( ab, e ); + w *= odd; +// if( w < 0.f || w > 1.f ) return -1; + if( w < -margin || w > 1.f+margin ) return -1; + + float u = 1.f-v-w; + if( u < -margin || u > 1.f+margin ) return -1; + + if( bCrdOut ) + { + bCrdOut->x = u; + bCrdOut->y = v; + bCrdOut->z = w; + } + return t; +} + +#endif + diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlCollisionShape.h b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlCollisionShape.h new file mode 100644 index 000000000..834c88c94 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlCollisionShape.h @@ -0,0 +1,49 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +#ifndef COLLISION_SHAPE_H +#define COLLISION_SHAPE_H + +#include "Stubs/AdlMath.h" +#include "Stubs/AdlAabb.h" + + +_MEM_CLASSALIGN16 +class CollisionShape +{ + public: + _MEM_ALIGNED_ALLOCATOR16; + + enum Type + { + SHAPE_HEIGHT_FIELD, + SHAPE_CONVEX_HEIGHT_FIELD, + SHAPE_PLANE, + MAX_NUM_SHAPE_TYPES, + }; + + CollisionShape( Type type, float collisionMargin = 0.0025f ) : m_type( type ){ m_collisionMargin = collisionMargin; } + virtual ~CollisionShape(){} + virtual float queryDistance(const float4& p) const = 0; + virtual bool queryDistanceWithNormal(const float4& p, float4& normalOut) const = 0; + + public: + Type m_type; + Aabb m_aabb; + float m_collisionMargin; +}; + +#endif diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlConstraint4.h b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlConstraint4.h new file mode 100644 index 000000000..8f5078122 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlConstraint4.h @@ -0,0 +1,49 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +#ifndef ADL_CONSTRAINT4_H +#define ADL_CONSTRAINT4_H + + + +struct Constraint4 + { + _MEM_ALIGNED_ALLOCATOR16; + + float4 m_linear; + float4 m_worldPos[4]; + float4 m_center; // friction + float m_jacCoeffInv[4]; + float m_b[4]; + float m_appliedRambdaDt[4]; + + float m_fJacCoeffInv[2]; // friction + float m_fAppliedRambdaDt[2]; // friction + + u32 m_bodyA; + u32 m_bodyB; + + u32 m_batchIdx; + u32 m_paddings[1]; + + __inline + void setFrictionCoeff(float value) { m_linear.w = value; } + __inline + float getFrictionCoeff() const { return m_linear.w; } + }; + +#endif //ADL_CONSTRAINT4_H + \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlContact4.h b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlContact4.h new file mode 100644 index 000000000..29e36ade7 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlContact4.h @@ -0,0 +1,102 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +#ifndef ADL_CONTACT4_H +#define ADL_CONTACT4_H + +#ifdef CL_PLATFORM_AMD +#include "AdlConstraint4.h" +#include "Adl/Adl.h" + +typedef adl::Buffer* SolverData; +#else +typedef void* SolverData; +#endif + +typedef void* ShapeDataType; + + +struct Contact4 +{ + _MEM_ALIGNED_ALLOCATOR16; + + float4 m_worldPos[4]; + float4 m_worldNormal; +// float m_restituitionCoeff; +// float m_frictionCoeff; + u16 m_restituitionCoeffCmp; + u16 m_frictionCoeffCmp; + int m_batchIdx; + + u32 m_bodyAPtr; + u32 m_bodyBPtr; + + // todo. make it safer + int& getBatchIdx() { return m_batchIdx; } + float getRestituitionCoeff() const { return ((float)m_restituitionCoeffCmp/(float)0xffff); } + void setRestituitionCoeff( float c ) { ADLASSERT( c >= 0.f && c <= 1.f ); m_restituitionCoeffCmp = (u16)(c*0xffff); } + float getFrictionCoeff() const { return ((float)m_frictionCoeffCmp/(float)0xffff); } + void setFrictionCoeff( float c ) { ADLASSERT( c >= 0.f && c <= 1.f ); m_frictionCoeffCmp = (u16)(c*0xffff); } + + float& getNPoints() { return m_worldNormal.w; } + float getNPoints() const { return m_worldNormal.w; } + + float getPenetration(int idx) const { return m_worldPos[idx].w; } + + bool isInvalid() const { return ((u32)m_bodyAPtr+(u32)m_bodyBPtr) == 0; } +}; + +struct ContactPoint4 + { + float4 m_worldPos[4]; + union + { + float4 m_worldNormal; + + struct Data + { + int m_padding[3]; + float m_nPoints; // for cl + }m_data; + + }; + float m_restituitionCoeff; + float m_frictionCoeff; +// int m_nPoints; +// int m_padding0; + + void* m_bodyAPtr; + void* m_bodyBPtr; +// int m_padding1; +// int m_padding2; + + float& getNPoints() { return m_data.m_nPoints; } + float getNPoints() const { return m_data.m_nPoints; } + + float getPenetration(int idx) const { return m_worldPos[idx].w; } + +// __inline +// void load(int idx, const ContactPoint& src); +// __inline +// void store(int idx, ContactPoint& dst) const; + + bool isInvalid() const { return ((u32)m_bodyAPtr+(u32)m_bodyBPtr) == 0; } + + }; + + +#endif //ADL_CONTACT4_H + diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlError.h b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlError.h new file mode 100644 index 000000000..e1f9ad8e9 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlError.h @@ -0,0 +1,80 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +#ifndef CL_ERROR_H +#define CL_ERROR_H + +#ifdef DX11RENDER +#include +#endif + +#ifdef _DEBUG + #include + #define CLASSERT(x) if(!(x)){__debugbreak(); } + #define ADLASSERT(x) if(!(x)){__debugbreak(); } +#else + #define CLASSERT(x) if(x){} + #define ADLASSERT(x) if(x){} + +#endif + + + + +#ifdef _DEBUG + #define COMPILE_TIME_ASSERT(x) {int compileTimeAssertFailed[x]; compileTimeAssertFailed[0];} +#else + #define COMPILE_TIME_ASSERT(x) +#endif + +#ifdef _DEBUG + #include + #include + __inline + void debugPrintf(const char *fmt, ...) + { + va_list arg; + va_start(arg, fmt); +#ifdef DX11RENDER + char buf[256]; + vsprintf_s( buf, 256, fmt, arg ); +#ifdef UNICODE + WCHAR wbuf[256]; + int sizeWide = MultiByteToWideChar(0,0,buf,-1,wbuf,0); + MultiByteToWideChar(0,0,buf,-1,wbuf,sizeWide); + +// swprintf_s( wbuf, 256, L"%s", buf ); + OutputDebugString( wbuf ); +#else + OutputDebugString( buf ); +#endif +#else + vprintf(fmt, arg); +#endif + va_end(arg); + } +#else + __inline + void debugPrintf(const char *fmt, ...) + { + } +#endif + + +#define WARN(msg) debugPrintf("WARNING: %s\n", msg); + +#endif + diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlMath.h b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlMath.h new file mode 100644 index 000000000..a72422047 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlMath.h @@ -0,0 +1,216 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +#ifndef CL_MATH_H +#define CL_MATH_H + +#include +#include +#include +#include + + +#include "AdlError.h" +#include +#define pxSort std::sort + +#define PI 3.14159265358979323846f +#define NEXTMULTIPLEOF(num, alignment) (((num)/(alignment) + (((num)%(alignment)==0)?0:1))*(alignment)) + + +#define _MEM_CLASSALIGN16 __declspec(align(16)) +#define _MEM_ALIGNED_ALLOCATOR16 void* operator new(size_t size) { return _aligned_malloc( size, 16 ); } \ + void operator delete(void *p) { _aligned_free( p ); } \ + void* operator new[](size_t size) { return _aligned_malloc( size, 16 ); } \ + void operator delete[](void *p) { _aligned_free( p ); } \ + void* operator new(size_t size, void* p) { return p; } \ + void operator delete(void *p, void* pp) {} + + + +template +T nextPowerOf2(T n) +{ + n -= 1; + for(int i=0; i>i); + return n+1; +} + + +_MEM_CLASSALIGN16 +struct float4 +{ + _MEM_ALIGNED_ALLOCATOR16; + union + { + struct + { + float x,y,z,w; + }; + struct + { + float s[4]; + }; + __m128 m_quad; + }; +}; + +__forceinline +unsigned int isZero(const float4& a) +{ + return (a.x == 0.f) & (a.y == 0.f) & (a.z == 0.f) & (a.w == 0.f); +} + +_MEM_CLASSALIGN16 +struct int4 +{ + _MEM_ALIGNED_ALLOCATOR16; + union + { + struct + { + int x,y,z,w; + }; + struct + { + int s[4]; + }; + }; +}; + +struct int2 +{ + union + { + struct + { + int x,y; + }; + struct + { + int s[2]; + }; + }; +}; + +struct float2 +{ + union + { + struct + { + float x,y; + }; + struct + { + float s[2]; + }; + }; +}; + + +typedef unsigned int u32; +typedef unsigned short u16; +typedef unsigned char u8; + + + +#include "Adlfloat4.inl" +//#include + + + + +template +void swap2(T& a, T& b) +{ + T tmp = a; + a = b; + b = tmp; +} + + +__inline +void randSeed(int seed) +{ + srand( seed ); +} + +template +__inline +T randRange(const T& minV, const T& maxV) +{ + float r = (rand()%10000)/10000.f; + T range = maxV - minV; + return (T)(minV + r*range); +} + +template<> +__inline +float4 randRange(const float4& minV, const float4& maxV) +{ + float4 r = make_float4( (rand()%10000)/10000.f, (rand()%10000)/10000.f, (rand()%10000)/10000.f, (rand()%10000)/10000.f ); + float4 range = maxV - minV; + return (minV + r*range); +} + + +struct SortData +{ + union + { + u32 m_key; + struct { u16 m_key16[2]; }; + }; + u32 m_value; + + friend bool operator <(const SortData& a, const SortData& b) + { + return a.m_key < b.m_key; + } +}; + + + +template +T* addByteOffset(void* baseAddr, u32 offset) +{ + return (T*)(((u32)baseAddr)+offset); +} + + +struct Pair32 +{ + Pair32(){} + Pair32(u32 a, u32 b) : m_a(a), m_b(b){} + + u32 m_a; + u32 m_b; +}; + +struct PtrPair +{ + PtrPair(){} + PtrPair(void* a, void* b) : m_a(a), m_b(b){} + template + PtrPair(T* a, T* b) : m_a((void*)a), m_b((void*)b){} + + void* m_a; + void* m_b; +}; + +#endif diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlMatrix3x3.h b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlMatrix3x3.h new file mode 100644 index 000000000..fbd82aac2 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlMatrix3x3.h @@ -0,0 +1,194 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +#ifndef MATRIX3X3_H +#define MATRIX3X3_H + +#include "AdlMath.h" + +/////////////////////////////////////// +// Matrix3x3 +/////////////////////////////////////// + +typedef +_MEM_CLASSALIGN16 struct +{ + _MEM_ALIGNED_ALLOCATOR16; + float4 m_row[3]; +}Matrix3x3; + +__inline +Matrix3x3 mtZero(); + +__inline +Matrix3x3 mtIdentity(); + +__inline +Matrix3x3 mtDiagonal(float a, float b, float c); + +__inline +Matrix3x3 mtTranspose(const Matrix3x3& m); + +__inline +Matrix3x3 mtMul(const Matrix3x3& a, const Matrix3x3& b); + +__inline +float4 mtMul1(const Matrix3x3& a, const float4& b); + +__inline +Matrix3x3 mtMul2(float a, const Matrix3x3& b); + +__inline +float4 mtMul3(const float4& b, const Matrix3x3& a); + +__inline +Matrix3x3 mtInvert(const Matrix3x3& m); + +__inline +Matrix3x3 mtZero() +{ + Matrix3x3 m; + m.m_row[0] = make_float4(0.f); + m.m_row[1] = make_float4(0.f); + m.m_row[2] = make_float4(0.f); + return m; +} + +__inline +Matrix3x3 mtIdentity() +{ + Matrix3x3 m; + m.m_row[0] = make_float4(1,0,0); + m.m_row[1] = make_float4(0,1,0); + m.m_row[2] = make_float4(0,0,1); + return m; +} + +__inline +Matrix3x3 mtDiagonal(float a, float b, float c) +{ + Matrix3x3 m; + m.m_row[0] = make_float4(a,0,0); + m.m_row[1] = make_float4(0,b,0); + m.m_row[2] = make_float4(0,0,c); + return m; +} + +__inline +Matrix3x3 mtTranspose(const Matrix3x3& m) +{ + Matrix3x3 out; + out.m_row[0] = make_float4(m.m_row[0].s[0], m.m_row[1].s[0], m.m_row[2].s[0], 0.f); + out.m_row[1] = make_float4(m.m_row[0].s[1], m.m_row[1].s[1], m.m_row[2].s[1], 0.f); + out.m_row[2] = make_float4(m.m_row[0].s[2], m.m_row[1].s[2], m.m_row[2].s[2], 0.f); + return out; +} + +__inline +Matrix3x3 mtMul(const Matrix3x3& a, const Matrix3x3& b) +{ + Matrix3x3 transB; + transB = mtTranspose( b ); + Matrix3x3 ans; + for(int i=0; i<3; i++) + { + ans.m_row[i].s[0] = dot3F4(a.m_row[i],transB.m_row[0]); + ans.m_row[i].s[1] = dot3F4(a.m_row[i],transB.m_row[1]); + ans.m_row[i].s[2] = dot3F4(a.m_row[i],transB.m_row[2]); + } + return ans; +} + +__inline +float4 mtMul1(const Matrix3x3& a, const float4& b) +{ + float4 ans; + ans.s[0] = dot3F4( a.m_row[0], b ); + ans.s[1] = dot3F4( a.m_row[1], b ); + ans.s[2] = dot3F4( a.m_row[2], b ); + return ans; +} + +__inline +Matrix3x3 mtMul2(float a, const Matrix3x3& b) +{ + Matrix3x3 ans; + ans.m_row[0] = a*b.m_row[0]; + ans.m_row[1] = a*b.m_row[1]; + ans.m_row[2] = a*b.m_row[2]; + return ans; +} + +__inline +float4 mtMul3(const float4& a, const Matrix3x3& b) +{ + float4 ans; + ans.x = a.x*b.m_row[0].x + a.y*b.m_row[1].x + a.z*b.m_row[2].x; + ans.y = a.x*b.m_row[0].y + a.y*b.m_row[1].y + a.z*b.m_row[2].y; + ans.z = a.x*b.m_row[0].z + a.y*b.m_row[1].z + a.z*b.m_row[2].z; + return ans; +} + +__inline +Matrix3x3 mtInvert(const Matrix3x3& m) +{ + float det = m.m_row[0].s[0]*m.m_row[1].s[1]*m.m_row[2].s[2]+m.m_row[1].s[0]*m.m_row[2].s[1]*m.m_row[0].s[2]+m.m_row[2].s[0]*m.m_row[0].s[1]*m.m_row[1].s[2] + -m.m_row[0].s[0]*m.m_row[2].s[1]*m.m_row[1].s[2]-m.m_row[2].s[0]*m.m_row[1].s[1]*m.m_row[0].s[2]-m.m_row[1].s[0]*m.m_row[0].s[1]*m.m_row[2].s[2]; + + CLASSERT( det ); + + Matrix3x3 ans; + ans.m_row[0].s[0] = m.m_row[1].s[1]*m.m_row[2].s[2] - m.m_row[1].s[2]*m.m_row[2].s[1]; + ans.m_row[0].s[1] = m.m_row[0].s[2]*m.m_row[2].s[1] - m.m_row[0].s[1]*m.m_row[2].s[2]; + ans.m_row[0].s[2] = m.m_row[0].s[1]*m.m_row[1].s[2] - m.m_row[0].s[2]*m.m_row[1].s[1]; + ans.m_row[0].w = 0.f; + + ans.m_row[1].s[0] = m.m_row[1].s[2]*m.m_row[2].s[0] - m.m_row[1].s[0]*m.m_row[2].s[2]; + ans.m_row[1].s[1] = m.m_row[0].s[0]*m.m_row[2].s[2] - m.m_row[0].s[2]*m.m_row[2].s[0]; + ans.m_row[1].s[2] = m.m_row[0].s[2]*m.m_row[1].s[0] - m.m_row[0].s[0]*m.m_row[1].s[2]; + ans.m_row[1].w = 0.f; + + ans.m_row[2].s[0] = m.m_row[1].s[0]*m.m_row[2].s[1] - m.m_row[1].s[1]*m.m_row[2].s[0]; + ans.m_row[2].s[1] = m.m_row[0].s[1]*m.m_row[2].s[0] - m.m_row[0].s[0]*m.m_row[2].s[1]; + ans.m_row[2].s[2] = m.m_row[0].s[0]*m.m_row[1].s[1] - m.m_row[0].s[1]*m.m_row[1].s[0]; + ans.m_row[2].w = 0.f; + + ans = mtMul2((1.0f/det), ans); + return ans; +} + +__inline +Matrix3x3 mtSet( const float4& a, const float4& b, const float4& c ) +{ + Matrix3x3 m; + m.m_row[0] = a; + m.m_row[1] = b; + m.m_row[2] = c; + return m; +} + +__inline +Matrix3x3 operator+(const Matrix3x3& a, const Matrix3x3& b) +{ + Matrix3x3 out; + out.m_row[0] = a.m_row[0] + b.m_row[0]; + out.m_row[1] = a.m_row[1] + b.m_row[1]; + out.m_row[2] = a.m_row[2] + b.m_row[2]; + return out; +} + +#endif + diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlQuaternion.h b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlQuaternion.h new file mode 100644 index 000000000..979e5fec5 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlQuaternion.h @@ -0,0 +1,155 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +#ifndef QUATERNION_H +#define QUATERNION_H + +#include "AdlMatrix3x3.h" + + +typedef float4 Quaternion; + +__inline +Quaternion qtSet(const float4& axis, float angle); + +__inline +Quaternion qtMul(const Quaternion& a, const Quaternion& b); + +__inline +float4 qtRotate(const Quaternion& q, const float4& vec); + +__inline +float4 qtInvRotate(const Quaternion& q, const float4& vec); + +__inline +Quaternion qtInvert(const Quaternion& q); + +__inline +Matrix3x3 qtGetRotationMatrix(const Quaternion& quat); + +__inline +Quaternion qtNormalize(const Quaternion& q); + +__inline +Quaternion qtGetIdentity() { return make_float4(0,0,0,1); } + +__inline +Quaternion qtSet(const float4& axis, float angle) +{ + float4 nAxis = normalize3( axis ); + + Quaternion q; + q.s[0] = nAxis.s[0]*sin(angle/2); + q.s[1] = nAxis.s[1]*sin(angle/2); + q.s[2] = nAxis.s[2]*sin(angle/2); + q.s[3] = cos(angle/2); + return q; +} + +__inline +Quaternion qtMul(const Quaternion& a, const Quaternion& b) +{ + Quaternion ans; + ans = cross3( a, b ); + ans += a.s[3]*b + b.s[3]*a; + ans.s[3] = a.s[3]*b.s[3] - (a.s[0]*b.s[0]+a.s[1]*b.s[1]+a.s[2]*b.s[2]); + return ans; +} + +__inline +float4 qtRotate(const Quaternion& q, const float4& vec) +{ + Quaternion vecQ = vec; + vecQ.s[3] = 0.f; + Quaternion qInv = qtInvert( q ); + float4 out = qtMul(qtMul(q,vecQ),qInv); + return out; +} + +__inline +float4 qtInvRotate(const Quaternion& q, const float4& vec) +{ + return qtRotate( qtInvert( q ), vec ); +} + +__inline +Quaternion qtInvert(const Quaternion& q) +{ + Quaternion ans; + ans.s[0] = -q.s[0]; + ans.s[1] = -q.s[1]; + ans.s[2] = -q.s[2]; + ans.s[3] = q.s[3]; + return ans; +} + +__inline +Matrix3x3 qtGetRotationMatrix(const Quaternion& quat) +{ + float4 quat2 = make_float4(quat.s[0]*quat.s[0], quat.s[1]*quat.s[1], quat.s[2]*quat.s[2], 0.f); + Matrix3x3 out; + + out.m_row[0].s[0]=1-2*quat2.s[1]-2*quat2.s[2]; + out.m_row[0].s[1]=2*quat.s[0]*quat.s[1]-2*quat.s[3]*quat.s[2]; + out.m_row[0].s[2]=2*quat.s[0]*quat.s[2]+2*quat.s[3]*quat.s[1]; + out.m_row[0].s[3] = 0.f; + + out.m_row[1].s[0]=2*quat.s[0]*quat.s[1]+2*quat.s[3]*quat.s[2]; + out.m_row[1].s[1]=1-2*quat2.s[0]-2*quat2.s[2]; + out.m_row[1].s[2]=2*quat.s[1]*quat.s[2]-2*quat.s[3]*quat.s[0]; + out.m_row[1].s[3] = 0.f; + + out.m_row[2].s[0]=2*quat.s[0]*quat.s[2]-2*quat.s[3]*quat.s[1]; + out.m_row[2].s[1]=2*quat.s[1]*quat.s[2]+2*quat.s[3]*quat.s[0]; + out.m_row[2].s[2]=1-2*quat2.s[0]-2*quat2.s[1]; + out.m_row[2].s[3] = 0.f; + + return out; +} + +__inline +Quaternion qtGetQuaternion(const Matrix3x3* m) +{ + Quaternion q; + q.w = sqrtf( m[0].m_row[0].x + m[0].m_row[1].y + m[0].m_row[2].z + 1 ) * 0.5f; + float inv4w = 1.f/(4.f*q.w); + q.x = (m[0].m_row[2].y-m[0].m_row[1].z)*inv4w; + q.y = (m[0].m_row[0].z-m[0].m_row[2].x)*inv4w; + q.z = (m[0].m_row[1].x-m[0].m_row[0].y)*inv4w; + + return q; +} + +__inline +Quaternion qtNormalize(const Quaternion& q) +{ + return normalize4(q); +} + +__inline +float4 transform(const float4& p, const float4& translation, const Quaternion& orientation) +{ + return qtRotate( orientation, p ) + translation; +} + +__inline +float4 invTransform(const float4& p, const float4& translation, const Quaternion& orientation) +{ + return qtRotate( qtInvert( orientation ), p-translation ); // use qtInvRotate +} + +#endif + diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlRigidBody.h b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlRigidBody.h new file mode 100644 index 000000000..b374cd032 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlRigidBody.h @@ -0,0 +1,59 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +#ifndef ADL_RIGID_BODY_H +#define ADL_RIGID_BODY_H + +#include "AdlQuaternion.h" + +class RigidBodyBase +{ + public: + + _MEM_CLASSALIGN16 + struct Body + { + _MEM_ALIGNED_ALLOCATOR16; + + float4 m_pos; + Quaternion m_quat; + float4 m_linVel; + float4 m_angVel; + + u32 m_shapeIdx; + u32 m_shapeType; + + float m_invMass; + float m_restituitionCoeff; + float m_frictionCoeff; + + }; + + struct Inertia + { +/* u16 m_shapeType; + u16 m_shapeIdx; + float m_restituitionCoeff; + float m_frictionCoeff; + int m_padding; +*/ + Matrix3x3 m_invInertia; + Matrix3x3 m_initInvInertia; + }; +}; + +#endif// ADL_RIGID_BODY_H + diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlTransform.h b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlTransform.h new file mode 100644 index 000000000..d9464babf --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlTransform.h @@ -0,0 +1,61 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +#ifndef _ADL_TRANSFORM_H +#define _ADL_TRANSFORM_H + +#include "AdlMath.h" +#include "AdlQuaternion.h" +#include "AdlMatrix3x3.h" + +struct Transform +{ + float4 m_translation; + Matrix3x3 m_rotation; +}; + +Transform trSetTransform(const float4& translation, const Quaternion& quat) +{ + Transform tr; + tr.m_translation = translation; + tr.m_rotation = qtGetRotationMatrix( quat ); + return tr; +} + +Transform trInvert( const Transform& tr ) +{ + Transform ans; + ans.m_rotation = mtTranspose( tr.m_rotation ); + ans.m_translation = mtMul1( ans.m_rotation, -tr.m_translation ); + return ans; +} + +Transform trMul(const Transform& trA, const Transform& trB) +{ + Transform ans; + ans.m_rotation = mtMul( trA.m_rotation, trB.m_rotation ); + ans.m_translation = mtMul1( trA.m_rotation, trB.m_translation ) + trA.m_translation; + return ans; +} + +float4 trMul1(const Transform& tr, const float4& p) +{ + return mtMul1( tr.m_rotation, p ) + tr.m_translation; +} + + +#endif //_ADL_TRANSFORM_H + diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/Adlfloat4.inl b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/Adlfloat4.inl new file mode 100644 index 000000000..4e98a087a --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/Adlfloat4.inl @@ -0,0 +1,373 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +//#define CHECK_ALIGNMENT(a) CLASSERT((u32(&(a)) & 0xf) == 0); +#define CHECK_ALIGNMENT(a) a; + + +__inline +float4 make_float4(float x, float y, float z, float w = 0.f) +{ + float4 v; + v.x = x; v.y = y; v.z = z; v.w = w; + return v; +} + +__inline +float4 make_float4(float x) +{ + return make_float4(x,x,x,x); +} + +__inline +float4 make_float4(const int4& x) +{ + return make_float4((float)x.s[0], (float)x.s[1], (float)x.s[2], (float)x.s[3]); +} + +__inline +float2 make_float2(float x, float y) +{ + float2 v; + v.s[0] = x; v.s[1] = y; + return v; +} + +__inline +float2 make_float2(float x) +{ + return make_float2(x,x); +} + +__inline +float2 make_float2(const int2& x) +{ + return make_float2((float)x.s[0], (float)x.s[1]); +} + +__inline +int4 make_int4(int x, int y, int z, int w = 0) +{ + int4 v; + v.s[0] = x; v.s[1] = y; v.s[2] = z; v.s[3] = w; + return v; +} + +__inline +int4 make_int4(int x) +{ + return make_int4(x,x,x,x); +} + +__inline +int4 make_int4(const float4& x) +{ + return make_int4((int)x.x, (int)x.y, (int)x.z, (int)x.w); +} + +__inline +int2 make_int2(int a, int b) +{ + int2 ans; ans.x = a; ans.y = b; + return ans; +} + +__inline +float4 operator-(const float4& a) +{ + return make_float4(-a.x, -a.y, -a.z, -a.w); +} + +__inline +float4 operator*(const float4& a, const float4& b) +{ + CLASSERT((u32(&a) & 0xf) == 0); + + float4 out; + out.s[0] = a.s[0]*b.s[0]; + out.s[1] = a.s[1]*b.s[1]; + out.s[2] = a.s[2]*b.s[2]; + out.s[3] = a.s[3]*b.s[3]; + return out; +} + +__inline +float4 operator*(float a, const float4& b) +{ + return make_float4(a*b.s[0], a*b.s[1], a*b.s[2], a*b.s[3]); +} + +__inline +float4 operator*(const float4& b, float a) +{ + CHECK_ALIGNMENT(b); + + return make_float4(a*b.s[0], a*b.s[1], a*b.s[2], a*b.s[3]); +} + +__inline +void operator*=(float4& a, const float4& b) +{ + CHECK_ALIGNMENT(a); + + a.s[0]*=b.s[0]; + a.s[1]*=b.s[1]; + a.s[2]*=b.s[2]; + a.s[3]*=b.s[3]; +} + +__inline +void operator*=(float4& a, float b) +{ + CHECK_ALIGNMENT(a); + + a.s[0]*=b; + a.s[1]*=b; + a.s[2]*=b; + a.s[3]*=b; +} + +// +__inline +float4 operator/(const float4& a, const float4& b) +{ + CHECK_ALIGNMENT(a); + + float4 out; + out.s[0] = a.s[0]/b.s[0]; + out.s[1] = a.s[1]/b.s[1]; + out.s[2] = a.s[2]/b.s[2]; + out.s[3] = a.s[3]/b.s[3]; + return out; +} + +__inline +float4 operator/(const float4& b, float a) +{ + CHECK_ALIGNMENT(b); + + return make_float4(b.s[0]/a, b.s[1]/a, b.s[2]/a, b.s[3]/a); +} + +__inline +void operator/=(float4& a, const float4& b) +{ + a.s[0]/=b.s[0]; + a.s[1]/=b.s[1]; + a.s[2]/=b.s[2]; + a.s[3]/=b.s[3]; +} + +__inline +void operator/=(float4& a, float b) +{ + CLASSERT((u32(&a) & 0xf) == 0); + + a.s[0]/=b; + a.s[1]/=b; + a.s[2]/=b; + a.s[3]/=b; +} +// + +__inline +float4 operator+(const float4& a, const float4& b) +{ + CHECK_ALIGNMENT(a); + + float4 out; + out.s[0] = a.s[0]+b.s[0]; + out.s[1] = a.s[1]+b.s[1]; + out.s[2] = a.s[2]+b.s[2]; + out.s[3] = a.s[3]+b.s[3]; + return out; +} + +__inline +float4 operator+(const float4& a, float b) +{ + CHECK_ALIGNMENT(a); + + float4 out; + out.s[0] = a.s[0]+b; + out.s[1] = a.s[1]+b; + out.s[2] = a.s[2]+b; + out.s[3] = a.s[3]+b; + return out; +} + +__inline +float4 operator-(const float4& a, const float4& b) +{ + CHECK_ALIGNMENT(a); + + float4 out; + out.s[0] = a.s[0]-b.s[0]; + out.s[1] = a.s[1]-b.s[1]; + out.s[2] = a.s[2]-b.s[2]; + out.s[3] = a.s[3]-b.s[3]; + return out; +} + +__inline +float4 operator-(const float4& a, float b) +{ + CHECK_ALIGNMENT(a); + + float4 out; + out.s[0] = a.s[0]-b; + out.s[1] = a.s[1]-b; + out.s[2] = a.s[2]-b; + out.s[3] = a.s[3]-b; + return out; +} + +__inline +void operator+=(float4& a, const float4& b) +{ + CHECK_ALIGNMENT(a); + + a.s[0]+=b.s[0]; + a.s[1]+=b.s[1]; + a.s[2]+=b.s[2]; + a.s[3]+=b.s[3]; +} + +__inline +void operator+=(float4& a, float b) +{ + CHECK_ALIGNMENT(a); + + a.s[0]+=b; + a.s[1]+=b; + a.s[2]+=b; + a.s[3]+=b; +} + +__inline +void operator-=(float4& a, const float4& b) +{ + CHECK_ALIGNMENT(a); + + a.s[0]-=b.s[0]; + a.s[1]-=b.s[1]; + a.s[2]-=b.s[2]; + a.s[3]-=b.s[3]; +} + +__inline +void operator-=(float4& a, float b) +{ + CHECK_ALIGNMENT(a); + + a.s[0]-=b; + a.s[1]-=b; + a.s[2]-=b; + a.s[3]-=b; +} + + + + + +__inline +float4 cross3(const float4& a, const float4& b) +{ + return make_float4(a.s[1]*b.s[2]-a.s[2]*b.s[1], + a.s[2]*b.s[0]-a.s[0]*b.s[2], + a.s[0]*b.s[1]-a.s[1]*b.s[0], + 0); +} + +__inline +float dot3F4(const float4& a, const float4& b) +{ + return a.x*b.x+a.y*b.y+a.z*b.z; +} + +__inline +float length3(const float4& a) +{ + return sqrtf(dot3F4(a,a)); +} + +__inline +float dot4(const float4& a, const float4& b) +{ + return a.x*b.x+a.y*b.y+a.z*b.z+a.w*b.w; +} + +// for height +__inline +float dot3w1(const float4& point, const float4& eqn) +{ + return point.x*eqn.x+point.y*eqn.y+point.z*eqn.z+eqn.w; +} + +__inline +float4 normalize3(const float4& a) +{ + float length = sqrtf(dot3F4(a, a)); + return 1.f/length * a; +} + +__inline +float4 normalize4(const float4& a) +{ + float length = sqrtf(dot4(a, a)); + return 1.f/length * a; +} + +__inline +float4 createEquation(const float4& a, const float4& b, const float4& c) +{ + float4 eqn; + float4 ab = b-a; + float4 ac = c-a; + eqn = normalize3( cross3(ab, ac) ); + eqn.w = -dot3F4(eqn,a); + return eqn; +} + + +template +__inline +T max2(const T& a, const T& b) +{ + return (a>b)? a:b; +} + +template +__inline +T min2(const T& a, const T& b) +{ + return (a +__inline +float4 max2(const float4& a, const float4& b) +{ + return make_float4( max2(a.x,b.x), max2(a.y,b.y), max2(a.z,b.z), max2(a.w,b.w) ); +} + +template<> +__inline +float4 min2(const float4& a, const float4& b) +{ + return make_float4( min2(a.x,b.x), min2(a.y,b.y), min2(a.z,b.z), min2(a.w,b.w) ); +} + diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/Adlfloat4SSE.inl b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/Adlfloat4SSE.inl new file mode 100644 index 000000000..a10211e06 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/Adlfloat4SSE.inl @@ -0,0 +1,381 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +//#define CHECK_ALIGNMENT(a) CLASSERT((u32(&(a)) & 0xf) == 0); +#define CHECK_ALIGNMENT(a) a; + + +__inline +float4 make_float4(float x, float y, float z, float w = 0.f) +{ + float4 v; + v.m_quad = _mm_set_ps(w,z,y,x); + + return v; +} + +__inline +float4 make_float4(float x) +{ + return make_float4(x,x,x,x); +} + +__inline +float4 make_float4(const int4& x) +{ + return make_float4((float)x.s[0], (float)x.s[1], (float)x.s[2], (float)x.s[3]); +} + +__inline +float2 make_float2(float x, float y) +{ + float2 v; + v.s[0] = x; v.s[1] = y; + return v; +} + +__inline +float2 make_float2(float x) +{ + return make_float2(x,x); +} + +__inline +float2 make_float2(const int2& x) +{ + return make_float2((float)x.s[0], (float)x.s[1]); +} + +__inline +int4 make_int4(int x, int y, int z, int w = 0) +{ + int4 v; + v.s[0] = x; v.s[1] = y; v.s[2] = z; v.s[3] = w; + return v; +} + +__inline +int4 make_int4(int x) +{ + return make_int4(x,x,x,x); +} + +__inline +int4 make_int4(const float4& x) +{ + return make_int4((int)x.x, (int)x.y, (int)x.z, (int)x.w); +} + +__inline +int2 make_int2(int a, int b) +{ + int2 ans; ans.x = a; ans.y = b; + return ans; +} + +__inline +float4 operator-(const float4& a) +{ + float4 zero; zero.m_quad = _mm_setzero_ps(); + float4 ans; ans.m_quad = _mm_sub_ps( zero.m_quad, a.m_quad ); + return ans; +} + +__inline +float4 operator*(const float4& a, const float4& b) +{ + CHECK_ALIGNMENT(a); + + float4 out; + out.m_quad = _mm_mul_ps( a.m_quad, b.m_quad ); + return out; +} + +__inline +float4 operator*(float a, const float4& b) +{ + float4 av; av.m_quad = _mm_set1_ps( a ); + return av*b; +} + +__inline +float4 operator*(const float4& b, float a) +{ + CHECK_ALIGNMENT(b); + + float4 av; av.m_quad = _mm_set1_ps( a ); + return av*b; +} + +__inline +void operator*=(float4& a, const float4& b) +{ + CHECK_ALIGNMENT(a); + + a = a*b; +} + +__inline +void operator*=(float4& a, float b) +{ + CHECK_ALIGNMENT(a); + + float4 bv; bv.m_quad = _mm_set1_ps( b ); + a = a*bv; +} + +// +__inline +float4 operator/(const float4& a, const float4& b) +{ + CHECK_ALIGNMENT(a); + + float4 out; + out.m_quad = _mm_div_ps( a.m_quad, b.m_quad ); + return out; +} + +__inline +float4 operator/(const float4& b, float a) +{ + CHECK_ALIGNMENT(b); + + float4 av; av.m_quad = _mm_set1_ps( a ); + float4 out; + out = b/av; + return out; +} + +__inline +void operator/=(float4& a, const float4& b) +{ + a = a/b; +} + +__inline +void operator/=(float4& a, float b) +{ + CLASSERT((u32(&a) & 0xf) == 0); + + float4 bv; bv.m_quad = _mm_set1_ps( b ); + a = a/bv; +} +// + +__inline +float4 operator+(const float4& a, const float4& b) +{ + CHECK_ALIGNMENT(a); + + float4 out; + out.m_quad = _mm_add_ps( a.m_quad, b.m_quad ); + return out; +} + +__inline +float4 operator+(const float4& a, float b) +{ + CHECK_ALIGNMENT(a); + + float4 bv; bv.m_quad = _mm_set1_ps( b ); + return a+bv; +} + +__inline +float4 operator-(const float4& a, const float4& b) +{ + CHECK_ALIGNMENT(a); + + float4 out; + out.m_quad = _mm_sub_ps( a.m_quad, b.m_quad ); + return out; +} + +__inline +float4 operator-(const float4& a, float b) +{ + CHECK_ALIGNMENT(a); + + float4 bv; bv.m_quad = _mm_set1_ps( b ); + return a-bv; +} + +__inline +void operator+=(float4& a, const float4& b) +{ + CHECK_ALIGNMENT(a); + + a = a + b; +} + +__inline +void operator+=(float4& a, float b) +{ + CHECK_ALIGNMENT(a); + + float4 bv; bv.m_quad = _mm_set1_ps( b ); + + a = a + bv; +} + +__inline +void operator-=(float4& a, const float4& b) +{ + CHECK_ALIGNMENT(a); + + a = a - b; +} + +__inline +void operator-=(float4& a, float b) +{ + CHECK_ALIGNMENT(a); + + float4 bv; bv.m_quad = _mm_set1_ps( b ); + + a = a - bv; +} + + + + + +__inline +float4 cross3(const float4& a, const float4& b) +{ // xnamathvector.inl + union IntVec + { + unsigned int m_i[4]; + __m128 m_v; + }; + + IntVec mask3 = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000}; + __m128 V1 = a.m_quad; + __m128 V2 = b.m_quad; + + __m128 vTemp1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(3,0,2,1)); + // z2,x2,y2,w2 + __m128 vTemp2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(3,1,0,2)); + // Perform the left operation + __m128 vResult = _mm_mul_ps(vTemp1,vTemp2); + // z1,x1,y1,w1 + vTemp1 = _mm_shuffle_ps(vTemp1,vTemp1,_MM_SHUFFLE(3,0,2,1)); + // y2,z2,x2,w2 + vTemp2 = _mm_shuffle_ps(vTemp2,vTemp2,_MM_SHUFFLE(3,1,0,2)); + // Perform the right operation + vTemp1 = _mm_mul_ps(vTemp1,vTemp2); + // Subract the right from left, and return answer + vResult = _mm_sub_ps(vResult,vTemp1); + // Set w to zero + float4 ans; ans.m_quad = _mm_and_ps(vResult,mask3.m_v); + return ans; +} + +__inline +float dot3F4(const float4& a, const float4& b) +{ +// return a.x*b.x+a.y*b.y+a.z*b.z; + // Perform the dot product + __m128 V1 = a.m_quad; + __m128 V2 = b.m_quad; + + __m128 vDot = _mm_mul_ps(V1,V2); + // x=Dot.vector4_f32[1], y=Dot.vector4_f32[2] + __m128 vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1)); + // Result.vector4_f32[0] = x+y + vDot = _mm_add_ss(vDot,vTemp); + // x=Dot.vector4_f32[2] + vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1)); + // Result.vector4_f32[0] = (x+y)+z + vDot = _mm_add_ss(vDot,vTemp); + // Splat x + float4 ans; ans.m_quad = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0)); + return ans.x; +} + +__inline +float length3(const float4& a) +{ + return sqrtf(dot3F4(a,a)); +} + +__inline +float dot4(const float4& a, const float4& b) +{ + return a.x*b.x+a.y*b.y+a.z*b.z+a.w*b.w; +} + +// for height +__inline +float dot3w1(const float4& point, const float4& eqn) +{ + return point.x*eqn.x+point.y*eqn.y+point.z*eqn.z+eqn.w; +} + +__inline +float4 normalize3(const float4& a) +{ + float length = sqrtf(dot3F4(a, a)); + return 1.f/length * a; +} + +__inline +float4 normalize4(const float4& a) +{ + float length = sqrtf(dot4(a, a)); + return 1.f/length * a; +} + +__inline +float4 createEquation(const float4& a, const float4& b, const float4& c) +{ + float4 eqn; + float4 ab = b-a; + float4 ac = c-a; + eqn = normalize3( cross3(ab, ac) ); + eqn.w = -dot3F4(eqn,a); + return eqn; +} + + +template +__inline +T max2(const T& a, const T& b) +{ + return (a>b)? a:b; +} + +template +__inline +T min2(const T& a, const T& b) +{ + return (a +__inline +float4 max2(const float4& a, const float4& b) +{ + return make_float4( max2(a.x,b.x), max2(a.y,b.y), max2(a.z,b.z), max2(a.w,b.w) ); +} + +template<> +__inline +float4 min2(const float4& a, const float4& b) +{ + return make_float4( min2(a.x,b.x), min2(a.y,b.y), min2(a.z,b.z), min2(a.w,b.w) ); +} + diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/ChNarrowPhase.h b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/ChNarrowPhase.h new file mode 100644 index 000000000..4ad551f51 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/ChNarrowPhase.h @@ -0,0 +1,154 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +#pragma once + +#include +//#include + +#include "AdlMath.h" +#include "AdlContact4.h" +#include "AdlRigidBody.h" + +#include "../ConvexHeightFieldShape.h" + +//#include "TypeDefinition.h" +//#include "RigidBody.h" +//#include "ConvexHeightFieldShape.h" + +namespace adl +{ +class ShapeBase; + +class ChNarrowphaseBase +{ + public: + struct Config + { + float m_collisionMargin; + }; +/* + typedef struct + { + // m_normal.w == height in u8 + float4 m_normal[HEIGHT_RES*HEIGHT_RES*6]; + u32 m_height4[HEIGHT_RES*HEIGHT_RES*6]; + + float m_scale; + float m_padding0; + float m_padding1; + float m_padding2; + } ShapeData; +*/ +}; + +template +class ChNarrowphase : public ChNarrowphaseBase +{ + public: + typedef Launcher::BufferInfo BufferInfo; + + struct Data + { + const Device* m_device; + Kernel* m_supportCullingKernel; + Kernel* m_narrowphaseKernel; + Kernel* m_narrowphaseWithPlaneKernel; + + Buffer* m_counterBuffer; + }; + + enum + { + N_TASKS = 4, + HEIGHT_RES = ConvexHeightField::HEIGHT_RES, + }; + + struct ShapeData + { + float4 m_normal[HEIGHT_RES*HEIGHT_RES*6]; + u32 m_height4[HEIGHT_RES*HEIGHT_RES*6]; + u32 m_supportHeight4[HEIGHT_RES*HEIGHT_RES*6]; + + float m_scale; + float m_padding0; + float m_padding1; + float m_padding2; + }; + + struct ConstData + { + int m_nPairs; + float m_collisionMargin; + int m_capacity; + int m_paddings[1]; + }; + + static + Data* allocate( const Device* device ); + + static + void deallocate( Data* data ); +/* + static + Buffer* allocateShapeBuffer( const Device* device, int capacity ); + + static + void deallocateShapeBuffer( Buffer* shapeBuf ); + + static + void setShape( Buffer* shapeBuf, ShapeBase* shape, int idx, float collisionMargin ); +*/ + static + ShapeDataType allocateShapeBuffer( const Device* device, int capacity ); + + static + void deallocateShapeBuffer( ShapeDataType shapeBuf ); + + static + void setShape( ShapeDataType shapeBuf, ShapeBase* shape, int idx, float collisionMargin = 0.f ); + + static + void setShape( ShapeDataType shapeBuf, ConvexHeightField* cvxShape, int idx, float collisionMargin = 0.f ); + + // Run NarrowphaseKernel + //template + static + void execute( Data* data, const Buffer* pairs, int nPairs, + const Buffer* bodyBuf, const ShapeDataType shapeBuf, + Buffer* contactOut, int& nContacts, const Config& cfg ); + + // Run NarrowphaseWithPlaneKernel + //template + static + void execute( Data* data, const Buffer* pairs, int nPairs, + const Buffer* bodyBuf, const ShapeDataType shapeBuf, + const Buffer* vtxBuf, const Buffer* idxBuf, + Buffer* contactOut, int& nContacts, const Config& cfg ); + + // Run SupportCullingKernel + //template + static + int culling( Data* data, const Buffer* pairs, int nPairs, const Buffer* bodyBuf, + const ShapeDataType shapeBuf, const Buffer* pairsOut, const Config& cfg ); +}; + +//#include +//#include + +#include "ChNarrowphase.inl" + +}; diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/ChNarrowphase.inl b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/ChNarrowphase.inl new file mode 100644 index 000000000..00ffbda24 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/ChNarrowphase.inl @@ -0,0 +1,303 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +//#define PATH "..\\..\\dynamics\\basic_demo\\Stubs\\ChNarrowphaseKernels" +#define PATH "..\\..\\dynamics\\basic_demo\\Stubs\\ChNarrowphaseKernels" +#define KERNEL0 "SupportCullingKernel" +#define KERNEL1 "NarrowphaseKernel" + +#include "ChNarrowphaseKernels.h" + +class ChNarrowphaseImp +{ +public: + static + __inline + u32 u32Pack(u8 x, u8 y, u8 z, u8 w) + { + return (x) | (y<<8) | (z<<16) | (w<<24); + } + +}; + +template +typename ChNarrowphase::Data* ChNarrowphase::allocate( const Device* device ) +{ + char options[100]; + + const char* src[] = +#if defined(ADL_LOAD_KERNEL_FROM_STRING) + {narrowphaseKernelsCL, 0}; +#else + {0,0}; +#endif + + + + + //sprintf(options, "-I ..\\..\\ -Wf,--c++"); + sprintf(options, "-I .\\NarrowPhaseCL\\"); + + Data* data = new Data; + data->m_device = device; + data->m_supportCullingKernel = device->getKernel( PATH, KERNEL0, options,src[TYPE] ); + data->m_narrowphaseKernel = device->getKernel( PATH, KERNEL1, options, src[TYPE]); + data->m_narrowphaseWithPlaneKernel = device->getKernel( PATH, "NarrowphaseWithPlaneKernel", options,src[TYPE]); + data->m_counterBuffer = new Buffer( device, 1 ); + + return data; +} + + +template +void ChNarrowphase::deallocate( Data* data ) +{ + delete data->m_counterBuffer; + + delete data; +} + +template +ShapeDataType ChNarrowphase::allocateShapeBuffer( const Device* device, int capacity ) +{ + ADLASSERT( device->m_type == TYPE ); + + return new Buffer( device, capacity ); +} + +template +void ChNarrowphase::deallocateShapeBuffer( ShapeDataType shapeBuf ) +{ + Buffer* s = (Buffer*)shapeBuf; + delete s; +} + +template +void ChNarrowphase::setShape( ShapeDataType shapeBuf, ShapeBase* shape, int idx, float collisionMargin ) +{ + ConvexHeightField* cvxShape = new ConvexHeightField( shape ); + Buffer* dst = (Buffer*)shapeBuf; + cvxShape->m_aabb.expandBy( make_float4( collisionMargin ) ); + { + ShapeData s; + { + for(int j=0; jm_normal[j]; + } + for(int j=0; jm_data[4*j], cvxShape->m_data[4*j+1], cvxShape->m_data[4*j+2], cvxShape->m_data[4*j+3] ); + s.m_supportHeight4[j] = ChNarrowphaseImp::u32Pack( cvxShape->m_supportHeight[4*j], cvxShape->m_supportHeight[4*j+1], cvxShape->m_supportHeight[4*j+2], cvxShape->m_supportHeight[4*j+3] ); + } + s.m_scale = cvxShape->m_scale; + } + dst->write( &s, 1, idx ); + DeviceUtils::waitForCompletion( dst->m_device ); + } + delete cvxShape; +} + +template +void ChNarrowphase::setShape( ShapeDataType shapeBuf, ConvexHeightField* cvxShape, int idx, float collisionMargin ) +{ + Buffer* dst = (Buffer*)shapeBuf; + cvxShape->m_aabb.expandBy( make_float4( collisionMargin ) ); + { + ShapeData s; + { + for(int j=0; jm_normal[j]; + } + for(int j=0; jm_data[4*j], cvxShape->m_data[4*j+1], cvxShape->m_data[4*j+2], cvxShape->m_data[4*j+3] ); + s.m_supportHeight4[j] = ChNarrowphaseImp::u32Pack( cvxShape->m_supportHeight[4*j], cvxShape->m_supportHeight[4*j+1], cvxShape->m_supportHeight[4*j+2], cvxShape->m_supportHeight[4*j+3] ); + } + s.m_scale = cvxShape->m_scale; + } + dst->write( &s, 1, idx ); + DeviceUtils::waitForCompletion( dst->m_device ); + } +} + +// Run NarrowphaseKernel +template +//template +void ChNarrowphase::execute( Data* data, const Buffer* pairs, int nPairs, const Buffer* bodyBuf, + const ShapeDataType shapeBuf, + Buffer* contactOut, int& nContacts, const Config& cfg ) +{ + if( nPairs == 0 ) return; + + Buffer* shapeBuffer = (Buffer*)shapeBuf; + ADLASSERT( shapeBuffer->getType() == TYPE ); + + const Device* device = data->m_device; + + Buffer* gPairsInNative + = BufferUtils::map( data->m_device, pairs ); + Buffer* gBodyInNative + = BufferUtils::map( data->m_device, bodyBuf ); + Buffer* gContactOutNative + = BufferUtils::map( data->m_device, contactOut ); // this might not be empty + + Buffer constBuffer( device, 1, BufferBase::BUFFER_CONST ); + + ConstData cdata; + cdata.m_nPairs = nPairs; + cdata.m_collisionMargin = cfg.m_collisionMargin; + cdata.m_capacity = contactOut->getSize() - nContacts; + + u32 n = nContacts; + data->m_counterBuffer->write( &n, 1 ); +// DeviceUtils::waitForCompletion( device ); + + { + BufferInfo bInfo[] = { BufferInfo( gPairsInNative, true ), BufferInfo( shapeBuffer ), BufferInfo( gBodyInNative ), + BufferInfo( gContactOutNative ), + BufferInfo( data->m_counterBuffer ) }; + Launcher launcher( data->m_device, data->m_narrowphaseKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( constBuffer, cdata ); + launcher.launch1D( nPairs*64, 64 ); + } + + data->m_counterBuffer->read( &n, 1 ); + DeviceUtils::waitForCompletion( device ); + + BufferUtils::unmap( gPairsInNative, pairs ); + BufferUtils::unmap( gBodyInNative, bodyBuf ); + BufferUtils::unmap( gContactOutNative, contactOut ); + + nContacts = min2((int)n, contactOut->getSize() ); +} + +// Run NarrowphaseWithPlaneKernel +template +//template +void ChNarrowphase::execute( Data* data, const Buffer* pairs, int nPairs, + const Buffer* bodyBuf, const ShapeDataType shapeBuf, + const Buffer* vtxBuf, const Buffer* idxBuf, + Buffer* contactOut, int& nContacts, const Config& cfg ) +{ + if( nPairs == 0 ) return; + + Buffer* shapeBuffer = (Buffer*)shapeBuf; + ADLASSERT( shapeBuffer->getType() == TYPE ); + + const Device* device = data->m_device; + + Buffer* gPairsInNative + = BufferUtils::map( data->m_device, pairs ); + Buffer* gBodyInNative + = BufferUtils::map( data->m_device, bodyBuf ); + Buffer* gContactOutNative + = BufferUtils::map( data->m_device, contactOut ); // this might not be empty + + Buffer constBuffer( device, 1, BufferBase::BUFFER_CONST ); + + ConstData cdata; + cdata.m_nPairs = nPairs; + cdata.m_collisionMargin = cfg.m_collisionMargin; + cdata.m_capacity = contactOut->getSize() - nContacts; + + u32 n = nContacts; + data->m_counterBuffer->write( &n, 1 ); +// DeviceUtils::waitForCompletion( device ); + + { + BufferInfo bInfo[] = { BufferInfo( gPairsInNative, true ), BufferInfo( shapeBuffer ), BufferInfo( gBodyInNative ), + BufferInfo( gContactOutNative ), + BufferInfo( data->m_counterBuffer ) }; + Launcher launcher( data->m_device, data->m_narrowphaseWithPlaneKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( constBuffer, cdata ); + launcher.launch1D( nPairs*64, 64 ); + } + + data->m_counterBuffer->read( &n, 1 ); + DeviceUtils::waitForCompletion( device ); + + BufferUtils::unmap( gPairsInNative, pairs ); + BufferUtils::unmap( gBodyInNative, bodyBuf ); + BufferUtils::unmap( gContactOutNative, contactOut ); + + nContacts = min2((int)n, contactOut->getSize() ); +} + +// Run SupportCullingKernel +template +//template +int ChNarrowphase::culling( Data* data, const Buffer* pairs, int nPairs, const Buffer* bodyBuf, + const ShapeDataType shapeBuf, const Buffer* pairsOut, const Config& cfg ) +{ + if( nPairs == 0 ) return 0; + + Buffer* shapeBuffer = (Buffer*)shapeBuf; + ADLASSERT( shapeBuffer->getType() == TYPE ); + + const Device* device = data->m_device; + + Buffer* gPairsInNative + = BufferUtils::map( data->m_device, pairs ); + Buffer* gBodyInNative + = BufferUtils::map( data->m_device, bodyBuf ); + Buffer* gPairsOutNative + = BufferUtils::map( data->m_device, pairsOut ); + + // + Buffer constBuffer( device, 1, BufferBase::BUFFER_CONST ); + + ConstData cdata; + cdata.m_nPairs = nPairs; + cdata.m_collisionMargin = cfg.m_collisionMargin; + cdata.m_capacity = pairsOut->getSize(); + + u32 n = 0; + data->m_counterBuffer->write( &n, 1 ); +// DeviceUtils::waitForCompletion( device ); + { + BufferInfo bInfo[] = { BufferInfo( gPairsInNative, true ), BufferInfo( shapeBuffer ), BufferInfo( gBodyInNative ), + BufferInfo( gPairsOutNative ), BufferInfo( data->m_counterBuffer ) }; + Launcher launcher( data->m_device, data->m_supportCullingKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( constBuffer, cdata ); + launcher.launch1D( nPairs, 64 ); + } + data->m_counterBuffer->read( &n, 1 ); + DeviceUtils::waitForCompletion( device ); +/* + if( gPairsInNative != pairs ) delete gPairsInNative; + if( gBodyInNative != bodyBuf ) delete gBodyInNative; + if( gPairsOutNative != pairsOut ) + { + gPairsOutNative->read( pairsOut->m_ptr, n ); + DeviceUtils::waitForCompletion( device ); + delete gPairsOutNative; + } +*/ + BufferUtils::unmap( gPairsInNative, pairs ); + BufferUtils::unmap( gBodyInNative, bodyBuf ); + BufferUtils::unmap( gPairsOutNative, pairsOut ); + + return min2((int)n, pairsOut->getSize() ); +} + +#undef PATH +#undef KERNEL0 +#undef KERNEL1 \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/ChNarrowphaseKernels.cl b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/ChNarrowphaseKernels.cl new file mode 100644 index 000000000..af177a836 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/ChNarrowphaseKernels.cl @@ -0,0 +1,1629 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +#pragma OPENCL EXTENSION cl_amd_printf : enable +#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable +#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable +#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable +#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable + +#ifdef cl_ext_atomic_counters_32 +#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable +#else +#define counter32_t volatile global int* +#endif + + +typedef unsigned int u32; +typedef unsigned short u16; +typedef unsigned char u8; + +#define GET_GROUP_IDX get_group_id(0) +#define GET_LOCAL_IDX get_local_id(0) +#define GET_GLOBAL_IDX get_global_id(0) +#define GET_GROUP_SIZE get_local_size(0) +#define GET_NUM_GROUPS get_num_groups(0) +#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) +#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) +#define AtomInc(x) atom_inc(&(x)) +#define AtomInc1(x, out) out = atom_inc(&(x)) +#define AppendInc(x, out) out = atomic_inc(x) +#define AtomAdd(x, value) atom_add(&(x), value) +#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value ) +#define AtomXhg(x, value) atom_xchg ( &(x), value ) + + +#define SELECT_UINT4( b, a, condition ) select( b,a,condition ) + +#define make_float4 (float4) +#define make_float2 (float2) +#define make_uint4 (uint4) +#define make_int4 (int4) +#define make_uint2 (uint2) +#define make_int2 (int2) + + +#define max2 max +#define min2 min + + +/////////////////////////////////////// +// Vector +/////////////////////////////////////// +__inline +float fastDiv(float numerator, float denominator) +{ + return native_divide(numerator, denominator); +// return numerator/denominator; +} + +__inline +float4 fastDiv4(float4 numerator, float4 denominator) +{ + return native_divide(numerator, denominator); +} + +__inline +float fastSqrtf(float f2) +{ + return native_sqrt(f2); +// return sqrt(f2); +} + +__inline +float fastRSqrt(float f2) +{ + return native_rsqrt(f2); +} + +__inline +float fastLength4(float4 v) +{ + return fast_length(v); +} + +__inline +float4 fastNormalize4(float4 v) +{ + return fast_normalize(v); +} + + +__inline +float sqrtf(float a) +{ +// return sqrt(a); + return native_sqrt(a); +} + +__inline +float4 cross3(float4 a, float4 b) +{ + return cross(a,b); +} + +__inline +float dot3F4(float4 a, float4 b) +{ + float4 a1 = make_float4(a.xyz,0.f); + float4 b1 = make_float4(b.xyz,0.f); + return dot(a1, b1); +} + +__inline +float length3(const float4 a) +{ + return sqrtf(dot3F4(a,a)); +} + +__inline +float dot4(const float4 a, const float4 b) +{ + return dot( a, b ); +} + +// for height +__inline +float dot3w1(const float4 point, const float4 eqn) +{ + return dot3F4(point,eqn) + eqn.w; +} + +__inline +float4 normalize3(const float4 a) +{ + float4 n = make_float4(a.x, a.y, a.z, 0.f); + return fastNormalize4( n ); +// float length = sqrtf(dot3F4(a, a)); +// return 1.f/length * a; +} + +__inline +float4 normalize4(const float4 a) +{ + float length = sqrtf(dot4(a, a)); + return 1.f/length * a; +} + +__inline +float4 createEquation(const float4 a, const float4 b, const float4 c) +{ + float4 eqn; + float4 ab = b-a; + float4 ac = c-a; + eqn = normalize3( cross3(ab, ac) ); + eqn.w = -dot3F4(eqn,a); + return eqn; +} + +/////////////////////////////////////// +// Matrix3x3 +/////////////////////////////////////// + +typedef struct +{ + float4 m_row[3]; +}Matrix3x3; + +__inline +Matrix3x3 mtZero(); + +__inline +Matrix3x3 mtIdentity(); + +__inline +Matrix3x3 mtTranspose(Matrix3x3 m); + +__inline +Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b); + +__inline +float4 mtMul1(Matrix3x3 a, float4 b); + +__inline +float4 mtMul3(float4 a, Matrix3x3 b); + +__inline +Matrix3x3 mtZero() +{ + Matrix3x3 m; + m.m_row[0] = (float4)(0.f); + m.m_row[1] = (float4)(0.f); + m.m_row[2] = (float4)(0.f); + return m; +} + +__inline +Matrix3x3 mtIdentity() +{ + Matrix3x3 m; + m.m_row[0] = (float4)(1,0,0,0); + m.m_row[1] = (float4)(0,1,0,0); + m.m_row[2] = (float4)(0,0,1,0); + return m; +} + +__inline +Matrix3x3 mtTranspose(Matrix3x3 m) +{ + Matrix3x3 out; + out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f); + out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f); + out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f); + return out; +} + +__inline +Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b) +{ + Matrix3x3 transB; + transB = mtTranspose( b ); + Matrix3x3 ans; + // why this doesn't run when 0ing in the for{} + a.m_row[0].w = 0.f; + a.m_row[1].w = 0.f; + a.m_row[2].w = 0.f; + for(int i=0; i<3; i++) + { +// a.m_row[i].w = 0.f; + ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]); + ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]); + ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]); + ans.m_row[i].w = 0.f; + } + return ans; +} + +__inline +float4 mtMul1(Matrix3x3 a, float4 b) +{ + float4 ans; + ans.x = dot3F4( a.m_row[0], b ); + ans.y = dot3F4( a.m_row[1], b ); + ans.z = dot3F4( a.m_row[2], b ); + ans.w = 0.f; + return ans; +} + +__inline +float4 mtMul3(float4 a, Matrix3x3 b) +{ + float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0); + float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0); + float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0); + + float4 ans; + ans.x = dot3F4( a, colx ); + ans.y = dot3F4( a, coly ); + ans.z = dot3F4( a, colz ); + return ans; +} + +/////////////////////////////////////// +// Quaternion +/////////////////////////////////////// + +typedef float4 Quaternion; + +__inline +Quaternion qtMul(Quaternion a, Quaternion b); + +__inline +Quaternion qtNormalize(Quaternion in); + +__inline +float4 qtRotate(Quaternion q, float4 vec); + +__inline +Quaternion qtInvert(Quaternion q); + +__inline +Matrix3x3 qtGetRotationMatrix(Quaternion q); + + + +__inline +Quaternion qtMul(Quaternion a, Quaternion b) +{ + Quaternion ans; + ans = cross3( a, b ); + ans += a.w*b+b.w*a; +// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); + ans.w = a.w*b.w - dot3F4(a, b); + return ans; +} + +__inline +Quaternion qtNormalize(Quaternion in) +{ + return fastNormalize4(in); +// in /= length( in ); +// return in; +} +__inline +float4 qtRotate(Quaternion q, float4 vec) +{ + Quaternion qInv = qtInvert( q ); + float4 vcpy = vec; + vcpy.w = 0.f; + float4 out = qtMul(qtMul(q,vcpy),qInv); + return out; +} + +__inline +Quaternion qtInvert(Quaternion q) +{ + return (Quaternion)(-q.xyz, q.w); +} + +__inline +float4 qtInvRotate(const Quaternion q, float4 vec) +{ + return qtRotate( qtInvert( q ), vec ); +} + +__inline +Matrix3x3 qtGetRotationMatrix(Quaternion quat) +{ + float4 quat2 = (float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f); + Matrix3x3 out; + + out.m_row[0].x=1-2*quat2.y-2*quat2.z; + out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z; + out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y; + out.m_row[0].w = 0.f; + + out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z; + out.m_row[1].y=1-2*quat2.x-2*quat2.z; + out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x; + out.m_row[1].w = 0.f; + + out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y; + out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x; + out.m_row[2].z=1-2*quat2.x-2*quat2.y; + out.m_row[2].w = 0.f; + + return out; +} + + +#define WG_SIZE 64 +#define HEIGHT_RES 4 +#define SHAPE_CONVEX_HEIGHT_FIELD 1//keep this in sync with AdlCollisionShape.h! + +typedef struct +{ + float4 m_normal[HEIGHT_RES*HEIGHT_RES*6]; + u32 m_height4[HEIGHT_RES*HEIGHT_RES*6]; + u32 m_supportHeight4[HEIGHT_RES*HEIGHT_RES*6]; + + float m_scale; + float m_padding0; + float m_padding1; + float m_padding2; +} ShapeData; + +typedef struct +{ + u32 m_height4[HEIGHT_RES*HEIGHT_RES*6/4]; + + float m_scale; +} ShapeDeviceData; + +typedef struct +{ + float4 m_pos; + float4 m_quat; + float4 m_linVel; + float4 m_angVel; + + u32 m_shapeIdx; + u32 m_shapeType; + + float m_invMass; + float m_restituitionCoeff; + float m_frictionCoeff; +} BodyData; + +typedef struct +{ + float4 m_worldPos[4]; + float4 m_worldNormal; // w: m_nPoints +// float m_restituitionCoeff; +// float m_frictionCoeff; + u32 m_coeffs; + u32 m_batchIdx; +// int m_nPoints; +// int m_padding0; + + u32 m_bodyAPtr;//x:m_bodyAPtr, y:m_bodyBPtr + u32 m_bodyBPtr; +} Contact4; + +#define GET_NPOINTS(x) (x).m_worldNormal.w + + +typedef struct +{ + int m_nPairs; + float m_collisionMargin; + int m_capacity; + int m_paddings[1]; +} ConstBuffer; + +__inline +float4 transform(const float4* p, const float4* translation, const Quaternion* orientation) +{ + return qtRotate( *orientation, *p ) + (*translation); +} + +__inline +float4 invTransform(const float4* p, const float4* translation, const Quaternion* orientation) +{ + return qtRotate( qtInvert( *orientation ), (*p)-(*translation) ); // use qtInvRotate +} + +void CubeMapUtilsCalcCrd(const float4 p, int* faceIdxOut, float* x, float* y) +{ + { + int idx; + float r2[] = {p.x*p.x, p.y*p.y, p.z*p.z}; + + if (r2[1]>r2[0]) + { + if (r2[2]>r2[1]) + { + idx = 2; + + } else + { + idx = 1; + } + + } else + { + if (r2[2]>r2[0]) + { + idx = 2; + } else + { + idx = 0; + } + } + + *faceIdxOut = (idx*2); +//== + float4 abs = make_float4( fabs(p.x), fabs(p.y), fabs(p.z), 0.f ); + + float d; + if( idx == 0 ) + { + *x = p.y; + *y = p.z; + d = abs.x; + *faceIdxOut += (p.x < 0.f)? 0: 1.f; + } + else if( idx == 1 ) + { + *x = p.z; + *y = p.x; + d = abs.y; + *faceIdxOut += (p.y < 0.f)? 0: 1.f; + } + else + { + *x = p.x; + *y = p.y; + d = abs.z; + *faceIdxOut += (p.z < 0.f)? 0: 1.f; + } + + float dInv = (d==0.f)? 0.f: fastDiv(1.f,d); + *x = (*x*dInv+1.f)*0.5f; + *y = (*y*dInv+1.f)*0.5f; + } +} + +float4 CubeMapUtilsCalcVector(int faceIdx, float x, float y) +{ + int dir = faceIdx/2; + float z = (faceIdx%2 == 0)? -1.f:1.f; + + x = x*2.f-1.f; + y = y*2.f-1.f; + + if( dir == 0 ) + { + return make_float4(z, x, y, 0.f); + } + else if( dir == 1 ) + { + return make_float4(y,z,x, 0.f); + } + else + { + return make_float4(x,y,z, 0.f); + } +} + +typedef int Face; + +u32 sample(__local ShapeDeviceData* shape, int face, int x, int y) +{ + + int idx = HEIGHT_RES*HEIGHT_RES*face + x + y*HEIGHT_RES; + __local u8* height = (__local u8*)shape->m_height4; + return height[idx]; +} + +u32 sampleSupportGlobal(__global ShapeData* shape, int face, int x, int y) +{ + + int idx = HEIGHT_RES*HEIGHT_RES*face + x + y*HEIGHT_RES; + __global u8* height = (__global u8*)shape->m_supportHeight4; + return height[idx]; +} + +float4 sampleNormal(__local ShapeData* shape, int face, int x, int y) +{ + return shape->m_normal[HEIGHT_RES*HEIGHT_RES*face + x + y*HEIGHT_RES]; +} + +float4 sampleNormalGlobal(const __global ShapeData* shape, int face, int x, int y) +{ + return shape->m_normal[HEIGHT_RES*HEIGHT_RES*face + x + y*HEIGHT_RES]; +} + +float4 ShapeDataCalcSamplePoint( __local const ShapeDeviceData* shape, int sIdx )//u8 height, int sIdx, float scale ) +{ + const float oneOver255 = 1.f/255.f; + + int faceIdx = fastDiv(sIdx,(HEIGHT_RES*HEIGHT_RES)); + int r = (sIdx%(HEIGHT_RES*HEIGHT_RES)); + int i = r/HEIGHT_RES; + int j = r%HEIGHT_RES; + + float4 v; + float x = fastDiv((i+0.5f),(float)HEIGHT_RES); + float y = fastDiv((j+0.5f),(float)HEIGHT_RES); + v = CubeMapUtilsCalcVector(faceIdx, x, y); + v = normalize3( v ); + + int quantizedHeight = sample( shape, faceIdx, i, j ); + float rheight = quantizedHeight*oneOver255*shape->m_scale; + return rheight*v; +} + +float ShapeDataQueryDistance(__local const ShapeDeviceData* shape, float4 p ) +{ + if( dot3F4( p, p ) >= shape->m_scale*shape->m_scale ) return FLT_MAX; + + const float oneOver255 = 1.f/255.f; + + int faceIdx; + float x, y; + CubeMapUtilsCalcCrd( p, &faceIdx, &x, &y ); + x = (x*HEIGHT_RES) - 0.5f; + y = (y*HEIGHT_RES) - 0.5f; + + float height; + { + int xi = (int)(x); + int yi = (int)(y); + float dx = x-xi; + float dy = y-yi; + + { + int xip = min2((int)(HEIGHT_RES-1), xi+1); + int yip = min2((int)(HEIGHT_RES-1), yi+1); + + u32 xy = sample( shape, faceIdx, xi, yi ); + u32 xpy = sample( shape, faceIdx, xip, yi ); + u32 xpyp = sample( shape, faceIdx, xip, yip ); + u32 xyp = sample( shape, faceIdx, xi, yip ); + + height = (xy*(1.f-dx)+xpy*dx)*(1.f-dy) + (xyp*(1.f-dx)+xpyp*dx)*dy; + height = height*oneOver255*shape->m_scale; + + p.w = 0.f; + + height = fastLength4( p ) - height; + } + } + + return height; +} + +float ShapeDataQuerySupportHeight(__global ShapeData* shape, float4 p ) +{ + int faceIdx; + float x, y; + CubeMapUtilsCalcCrd( p, &faceIdx, &x, &y ); + x = (x*HEIGHT_RES) - 0.5f; + y = (y*HEIGHT_RES) - 0.5f; + + float height; + { + int xi = (int)(x); + int yi = (int)(y); + + { + int xip = min2((int)(HEIGHT_RES-1), xi+1); + int yip = min2((int)(HEIGHT_RES-1), yi+1); + + u32 xy = sampleSupportGlobal( shape, faceIdx, xi, yi ); + u32 xpy = sampleSupportGlobal( shape, faceIdx, xip, yi ); + u32 xpyp = sampleSupportGlobal( shape, faceIdx, xip, yip ); + u32 xyp = sampleSupportGlobal( shape, faceIdx, xi, yip ); + + height = max2( xy, max2( xpy, max2( xpyp, xyp ) ) ); + height = height/255.f*shape->m_scale; + } + } + + return height; + +} + +float4 ShapeDataQueryNormal(__global const ShapeData* shape, float4 p ) +{ + int faceIdx; + float x, y; + CubeMapUtilsCalcCrd( p, &faceIdx, &x, &y ); + x = (x*HEIGHT_RES) - 0.5f; + y = (y*HEIGHT_RES) - 0.5f; + + float4 normalOut; + { + int xi = (int)(x); + int yi = (int)(y); + + normalOut = sampleNormalGlobal( shape, faceIdx, xi, yi ); + } + return normalOut; +} + + + +// kernels + + +__kernel +__attribute__((reqd_work_group_size(WG_SIZE,1,1))) +void SupportCullingKernel( __global int2* restrict gPairsIn, __global ShapeData* gShapes, + __global BodyData* gBodies, + __global int2* gPairsOut, + counter32_t gNPairs, + ConstBuffer cb ) +{ + int gIdx = GET_GLOBAL_IDX; + if( gIdx >= cb.m_nPairs ) return; + + const float collisionMargin = cb.m_collisionMargin; + const int capacity = cb.m_capacity; + + int2 pair = gPairsIn[gIdx]; + BodyData bodyA = gBodies[pair.x]; + BodyData bodyB = gBodies[pair.y]; + int shapeAIdx = bodyA.m_shapeIdx; + int shapeBIdx = bodyB.m_shapeIdx; + + + bool collide = false; + + //only collide if one of the two bodies has a non-zero mass + if (bodyA.m_invMass==0.f && bodyB.m_invMass==0.f) + return; + + + if (bodyA.m_shapeType == SHAPE_CONVEX_HEIGHT_FIELD && bodyB.m_shapeType==SHAPE_CONVEX_HEIGHT_FIELD) + { + float4 abInA, baInB; + float4 ab = bodyB.m_pos - bodyA.m_pos; + { + abInA = qtInvRotate( bodyA.m_quat, ab ); + baInB = qtInvRotate( bodyB.m_quat, -ab ); + } + float hA = ShapeDataQuerySupportHeight( gShapes+shapeAIdx, abInA ); + float hB = ShapeDataQuerySupportHeight( gShapes+shapeBIdx, baInB ); + + float h2 = dot3F4( ab, ab ); + + collide = ( hA + hB + collisionMargin > sqrtf(h2) ); + } + + if( collide ) + { + int dstIdx; + AppendInc( gNPairs, dstIdx ); + if( dstIdx < capacity ) + gPairsOut[dstIdx] = pair; + } +} + + +#define PARALLEL_DO(execution, n) for(int ie=0; ie h[lIdx+1].y)? h[lIdx]: h[lIdx+1];\ + mem_fence( CLK_LOCAL_MEM_FENCE );\ + h[lIdx] = (h[lIdx].y > h[lIdx+2].y)? h[lIdx]: h[lIdx+2];\ + mem_fence( CLK_LOCAL_MEM_FENCE );\ + h[lIdx] = (h[lIdx].y > h[lIdx+4].y)? h[lIdx]: h[lIdx+4];\ + mem_fence( CLK_LOCAL_MEM_FENCE );\ + h[lIdx] = (h[lIdx].y > h[lIdx+8].y)? h[lIdx]: h[lIdx+8];\ + mem_fence( CLK_LOCAL_MEM_FENCE );\ + h[lIdx] = (h[lIdx].y > h[lIdx+16].y)? h[lIdx]: h[lIdx+16];\ + }} + +#define PARALLEL_REDUCE32(h) \ + {int lIdx = GET_LOCAL_IDX;\ + if( lIdx < 32 )\ + {\ + h[lIdx] += h[lIdx+1];\ + mem_fence( CLK_LOCAL_MEM_FENCE );\ + h[lIdx] += h[lIdx+2];\ + mem_fence( CLK_LOCAL_MEM_FENCE );\ + h[lIdx] += h[lIdx+4];\ + mem_fence( CLK_LOCAL_MEM_FENCE );\ + h[lIdx] += h[lIdx+8];\ + mem_fence( CLK_LOCAL_MEM_FENCE );\ + h[lIdx] += h[lIdx+16];\ + }} + + +float4 extractManifold(__local float4* p, __local float4* h, __local int* nPointsPtr, float4 nearNormal) +{ + int nPoints = *nPointsPtr; + float4 center = make_float4(0,0,0,0); + { // calculate center + nPoints = min2( nPoints, 32 ); + { + int lIdx = GET_LOCAL_IDX; + h[lIdx] = p[lIdx]; + h[lIdx] = (lIdx= nPoints ) a[ie] = make_int4(-0xfffffff, -0xfffffff, -0xfffffff, -0xfffffff); + } + } + + GROUP_LDS_BARRIER; + + { // vector reduce, h[64] + int lIdx = GET_LOCAL_IDX; + if( lIdx < 32 ) + { + h[lIdx] = max2( h[lIdx], h[lIdx+1] ); + mem_fence( CLK_LOCAL_MEM_FENCE ); + h[lIdx] = max2( h[lIdx], h[lIdx+2] ); + mem_fence( CLK_LOCAL_MEM_FENCE ); + h[lIdx] = max2( h[lIdx], h[lIdx+4] ); + mem_fence( CLK_LOCAL_MEM_FENCE ); + h[lIdx] = max2( h[lIdx], h[lIdx+8] ); + mem_fence( CLK_LOCAL_MEM_FENCE ); + h[lIdx] = max2( h[lIdx], h[lIdx+16] ); + } + } + + GROUP_LDS_BARRIER; + } + { + { // set to idx + idx[0] = (int)a[0].x & 0xff; + idx[1] = (int)a[0].y & 0xff; + idx[2] = (int)a[0].z & 0xff; + idx[3] = (int)a[0].w & 0xff; + } + + GROUP_LDS_BARRIER; + float4 selection; + if( GET_LOCAL_IDX < 4 ) selection = p[idx[GET_LOCAL_IDX]]; + + GROUP_LDS_BARRIER; + if( GET_LOCAL_IDX < 4 ) p[GET_LOCAL_IDX] = selection; + } + + + return center; +} + +void extractManifold1(__local float4* p, __local float4* h, __local int* nPointsPtr, float4 center) +{ + __local int* a = (__local int*)h; + { + GROUP_LDS_BARRIER; + float4 selection; + if( GET_LOCAL_IDX < 4 ) + { + int idx = (int)a[GET_LOCAL_IDX] & 0xff; + selection = p[idx]; + } + + GROUP_LDS_BARRIER; + if( GET_LOCAL_IDX < 4 ) p[GET_LOCAL_IDX] = selection; + } + +} + +void extractManifold2( __local float4* p0, __local int* nPointsPtr0, float4 nearNormal0, + __local float4* p1, __local int* nPointsPtr1, float4 nearNormal1, + __local float4* h, float4 centerOut[2]) +{ + + int nPoints[2]; + nPoints[0] = *nPointsPtr0; + nPoints[1] = *nPointsPtr1; + float4 center[2]; + center[0] = make_float4(0,0,0,0); + center[1] = make_float4(0,0,0,0); + { // calculate center + nPoints[0] = min2( nPoints[0], 32 ); + nPoints[1] = min2( nPoints[1], 32 ); + { + int lIdx = GET_LOCAL_IDX; + h[lIdx] = (lIdx= nPoints[setIdx] ) a[ie + setIdx*64] = make_int4(-0xfffffff, -0xfffffff, -0xfffffff, -0xfffffff); + + a[ie + 32] = make_int4(-0xfffffff, -0xfffffff, -0xfffffff, -0xfffffff); + } + } + } + GROUP_LDS_BARRIER; + + { // vector reduce, h[64] + int bIdx = GET_LOCAL_IDX/32; + int eIdx = GET_LOCAL_IDX%32; + int lIdx = eIdx + bIdx*64; + { + h[lIdx] = max2( h[lIdx], h[lIdx+1] ); + mem_fence( CLK_LOCAL_MEM_FENCE ); + h[lIdx] = max2( h[lIdx], h[lIdx+2] ); + mem_fence( CLK_LOCAL_MEM_FENCE ); + h[lIdx] = max2( h[lIdx], h[lIdx+4] ); + mem_fence( CLK_LOCAL_MEM_FENCE ); + h[lIdx] = max2( h[lIdx], h[lIdx+8] ); + mem_fence( CLK_LOCAL_MEM_FENCE ); + h[lIdx] = max2( h[lIdx], h[lIdx+16] ); + } + } + + GROUP_LDS_BARRIER; + } + __local int* a = (__local int*)h; + { + GROUP_LDS_BARRIER; + + float4 selection; + + int bIdx = GET_LOCAL_IDX/32; + int eIdx = GET_LOCAL_IDX%32; + + if( eIdx < 4 ) + { + int idx = (int)a[eIdx+64*4*bIdx] & 0xff; + selection = p0[idx+32*bIdx]; + } + + GROUP_LDS_BARRIER; + if( eIdx < 4 ) p0[eIdx+32*bIdx] = selection; + } +} + +/* +1. Query Normal +2. Fill Normal +3. A->B, B->A +*/ + +void testVtx(__local BodyData* bodyAPtr, __local BodyData* bodyBPtr, + __local ShapeDeviceData* shapeAPtr, __local ShapeDeviceData* shapeBPtr, + __local int* lNContacts, __local float4* lCPoints) +{ + int pIdx = GET_LOCAL_IDX; + float4 bodyAPos = bodyAPtr->m_pos; + float4 bodyBPos = bodyBPtr->m_pos; + Quaternion bodyAQuat = bodyAPtr->m_quat; + Quaternion bodyBQuat = bodyBPtr->m_quat; + while( pIdx < HEIGHT_RES*HEIGHT_RES*6 ) + { + float4 pInB = ShapeDataCalcSamplePoint( shapeBPtr, pIdx ); + + float4 pInW = transform( &pInB, &bodyBPos, &bodyBQuat ); +// Aabb bodyAAabb = bodyAPtr->m_aabb; +// if( AabbOverlapsPoint( &bodyAAabb, pInW ) ) + { + float4 pInA = invTransform( &pInW, &bodyAPos, &bodyAQuat ); + + float dist = ShapeDataQueryDistance( shapeAPtr, pInA ); + if( dist < 0.010f ) + { + int dstIdx = atom_add( lNContacts, 1 ); + if( dstIdx < 32 ) + { + lCPoints[ dstIdx ] = make_float4( pInA.x, pInA.y, pInA.z, dist ); + } + } + } + + pIdx += GET_GROUP_SIZE; + } +} + +void testVtx2(__local const BodyData* bodyA, __local const BodyData* bodyB, + __local const ShapeDeviceData* shapeA, __local const ShapeDeviceData* shapeB, + __local int* lNContactsA, __local float4* lCPointsA, + __local int* lNContactsB, __local float4* lCPointsB, float collisionMargin ) +{ + int pIdx = GET_LOCAL_IDX; + + while( pIdx < HEIGHT_RES*HEIGHT_RES*6*2 ) + { + __local const BodyData* bodyAPtr =( pIdx < HEIGHT_RES*HEIGHT_RES*6 )?bodyA:bodyB; + __local const BodyData* bodyBPtr =( pIdx < HEIGHT_RES*HEIGHT_RES*6 )?bodyB:bodyA; + __local const ShapeDeviceData* shapeAPtr =( pIdx < HEIGHT_RES*HEIGHT_RES*6 )?shapeA:shapeB; + __local const ShapeDeviceData* shapeBPtr =( pIdx < HEIGHT_RES*HEIGHT_RES*6 )?shapeB:shapeA; + __local int* lNContacts =( pIdx < HEIGHT_RES*HEIGHT_RES*6 )?lNContactsA:lNContactsB; + __local float4* lCPoints =( pIdx < HEIGHT_RES*HEIGHT_RES*6 )?lCPointsA:lCPointsB; + + float4 bodyAPos = bodyAPtr->m_pos; + float4 bodyBPos = bodyBPtr->m_pos; + Quaternion bodyAQuat = bodyAPtr->m_quat; + Quaternion bodyBQuat = bodyBPtr->m_quat; + + float4 pInB = ShapeDataCalcSamplePoint( shapeBPtr, pIdx%(HEIGHT_RES*HEIGHT_RES*6) ); + + float4 pInW = transform( &pInB, &bodyBPos, &bodyBQuat ); +// Aabb bodyAAabb = bodyAPtr->m_aabb; +// if( AabbOverlapsPoint( &bodyAAabb, pInW ) ) + { + float4 pInA = invTransform( &pInW, &bodyAPos, &bodyAQuat ); + + float dist = ShapeDataQueryDistance( shapeAPtr, pInA ); + if( dist < collisionMargin ) + { + int dstIdx = atom_add( lNContacts, 1 ); + if( dstIdx < 32 ) + { + lCPoints[ dstIdx ] = make_float4( pInA.x, pInA.y, pInA.z, dist ); + } + } + } + + pIdx += GET_GROUP_SIZE; + } +} + +void testVtxWithPlane(__local BodyData* bodyA, __local BodyData* bodyB, + float4 nA, __local ShapeDeviceData* shapeB, + __local int* lNContactsA, __local float4* lCPointsA, float collisionMargin) +{ + int pIdx = GET_LOCAL_IDX; + + while( pIdx < HEIGHT_RES*HEIGHT_RES*6 ) + { + __local BodyData* bodyAPtr =bodyA; + __local BodyData* bodyBPtr =bodyB; + __local ShapeDeviceData* shapeBPtr =shapeB; + __local int* lNContacts =lNContactsA; + __local float4* lCPoints =lCPointsA; + + float4 bodyAPos = bodyAPtr->m_pos; + float4 bodyBPos = bodyBPtr->m_pos; + Quaternion bodyAQuat = bodyAPtr->m_quat; + Quaternion bodyBQuat = bodyBPtr->m_quat; + + float4 pInB = ShapeDataCalcSamplePoint( shapeBPtr, pIdx%(HEIGHT_RES*HEIGHT_RES*6) ); + + float4 pInW = transform( &pInB, &bodyBPos, &bodyBQuat ); + { + float4 pInA = invTransform( &pInW, &bodyAPos, &bodyAQuat ); + + float dist = dot3w1( pInA, nA );//ShapeDataQueryDistance( shapeAPtr, pInA ); + if( dist < collisionMargin ) + { + int dstIdx = atom_add( lNContacts, 1 ); + if( dstIdx < 32 ) + { + lCPoints[ dstIdx ] = make_float4( pInA.x, pInA.y, pInA.z, dist ); + } + } + } + + pIdx += GET_GROUP_SIZE; + } +} + +#define GET_SHAPE_IDX(x) (int)((x).m_shapeIdx) + +void output(__local BodyData* bodyAPtr, __local BodyData* bodyBPtr, + __local int2* iPair, + __local int* lNContacts, __local float4* lCPoints, + float4 center, + __global ShapeData* shapeData, __global Contact4* contactsOut, float collisionMargin) +{ + if( *lNContacts != 0 ) + { + int nContacts = min2( *lNContacts, 4 ); + + __global Contact4* c = contactsOut; + + if( GET_LOCAL_IDX < nContacts ) + { + int i = GET_LOCAL_IDX; + float4 p = lCPoints[i]; + float4 bodyAPos = bodyAPtr->m_pos; + Quaternion bodyAQuat = bodyAPtr->m_quat; + + c->m_worldPos[i] = transform( &p, &bodyAPos, &bodyAQuat ); + c->m_worldPos[i].w = lCPoints[i].w - collisionMargin; + } + + if( GET_LOCAL_IDX == 0 ) + { + float4 contactNormal; + contactNormal = ShapeDataQueryNormal( &shapeData[GET_SHAPE_IDX(*bodyAPtr)], center ); + contactNormal = normalize3( qtRotate( bodyAPtr->m_quat, contactNormal ) ); + + c->m_worldNormal = contactNormal; +// c->m_restituitionCoeff = 0.f; +// c->m_frictionCoeff = 0.7f; + c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16); + GET_NPOINTS(*c) = nContacts; + c->m_bodyAPtr = iPair[0].x; + c->m_bodyBPtr = iPair[0].y; + } + } + else + { + if( GET_LOCAL_IDX == 0 ) + GET_NPOINTS(contactsOut[0]) = 0; + } +} + +// todo. make it better +void output2(__local BodyData* bodyAPtr, __local BodyData* bodyBPtr, + int pair0, int pair1, + __local int* lNContacts, __local float4* lCPoints, + float4 center, + const __global ShapeData* shapeData, __global Contact4* contactsOut, counter32_t nContactsOut, int capacity, + float collisionMargin ) +{ + int lIdx = GET_LOCAL_IDX%32; + int nContacts = min2( *lNContacts, 4 ); + + GROUP_LDS_BARRIER; + + if( lIdx == 0 && nContacts) + { + int dstIdx; + AppendInc( nContactsOut, dstIdx ); + *lNContacts = dstIdx; + + if( dstIdx >= capacity ) + *lNContacts = -1; + } + + GROUP_LDS_BARRIER; + + bool canWrite = (*lNContacts!=-1); + + if( nContacts && canWrite ) + { + __global Contact4* c = contactsOut + (*lNContacts); + + if( lIdx < nContacts ) + { + int i = lIdx; + float4 p = lCPoints[i]; + float4 bodyAPos = bodyAPtr->m_pos; + Quaternion bodyAQuat = bodyAPtr->m_quat; + + p = transform( &p, &bodyAPos, &bodyAQuat ); + p.w = lCPoints[i].w - collisionMargin; + c->m_worldPos[i] = p; + } + + if( lIdx == 0 ) + { + if( nContacts ) + { + float4 contactNormal; + contactNormal = ShapeDataQueryNormal( &shapeData[GET_SHAPE_IDX(*bodyAPtr)], center ); + contactNormal = normalize3( qtRotate( bodyAPtr->m_quat, contactNormal ) ); + + c->m_worldNormal = contactNormal; +// c->m_restituitionCoeff = 0.f; +// c->m_frictionCoeff = 0.7f; + c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16); + c->m_bodyAPtr = pair0; + c->m_bodyBPtr = pair1; + } + GET_NPOINTS(*c) = nContacts; + } + } +} + +__inline +void output2LDS(__local BodyData* bodyAPtr, __local BodyData* bodyBPtr, + int pair0, int pair1, + int lNContacts, __local float4* lCPoints, + float4 center, + const __global ShapeData* shapeData, __local Contact4* contactsOut, + float collisionMargin ) +{ + int lIdx = GET_LOCAL_IDX%32; +// int lIdx = GET_LOCAL_IDX; +// int groupIdx = 0; + + int nContacts = min2( lNContacts, 4 ); + + GROUP_LDS_BARRIER; + + if( nContacts != 0 ) + { + if( lIdx < nContacts ) + { + int i = lIdx; + float4 p = lCPoints[i]; + float4 bodyAPos = bodyAPtr->m_pos; + Quaternion bodyAQuat = bodyAPtr->m_quat; + + p = transform( &p, &bodyAPos, &bodyAQuat ); + p.w = lCPoints[i].w - collisionMargin; + contactsOut->m_worldPos[i] = p; + } + } + + if( lIdx == 0 ) + { + if( nContacts != 0 ) + { + float4 contactNormal; + contactNormal = ShapeDataQueryNormal( &shapeData[GET_SHAPE_IDX(*bodyAPtr)], center ); + contactNormal = normalize3( qtRotate( bodyAPtr->m_quat, contactNormal ) ); + + contactsOut->m_worldNormal = contactNormal; +// contactsOut->m_worldNormal = make_float4(1.5f,1.4f,1.3f,0.f); +// contactsOut->m_restituitionCoeff = 0.f; +// contactsOut->m_frictionCoeff = 0.7f; + contactsOut->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16); + contactsOut->m_bodyAPtr = pair0; + contactsOut->m_bodyBPtr = pair1; + } + GET_NPOINTS(*contactsOut) = nContacts;//nContacts; + } + +// contactsOut[groupIdx].m_worldNormal = make_float4(1.5f,1.4f,1.3f,0.f); +} + +void output2_1(__local BodyData* bodyAPtr, __local BodyData* bodyBPtr, + int pair0, int pair1, + __local int* lNContacts, __local float4* lCPoints, + float4 center, float4 nA, + const __global ShapeData* shapeData, __global Contact4* contactsOut, counter32_t nContactsOut, int capacity, float collisionMargin ) +{ + int lIdx = GET_LOCAL_IDX; + int nContacts = min2( *lNContacts, 4 ); + + GROUP_LDS_BARRIER; + + if( lIdx == 0 && nContacts) + { + int dstIdx; + AppendInc( nContactsOut, dstIdx ); + *lNContacts = dstIdx; + + if( dstIdx >= capacity ) + *lNContacts = -1; + } + + GROUP_LDS_BARRIER; + + bool canWrite = (*lNContacts!=-1); + + if( nContacts && canWrite ) + { + __global Contact4* c = contactsOut + (*lNContacts); + + if( lIdx < nContacts ) + { + int i = lIdx; + float4 p = lCPoints[i]; + float4 bodyAPos = bodyAPtr->m_pos; + Quaternion bodyAQuat = bodyAPtr->m_quat; + + p = transform( &p, &bodyAPos, &bodyAQuat ); + p.w = lCPoints[i].w - collisionMargin; + c->m_worldPos[i] = p; + } + + if( lIdx == 0 ) + { + if( nContacts ) + { + float4 contactNormal; + contactNormal = nA;//ShapeDataQueryNormal( &shapeData[GET_SHAPE_IDX(*bodyAPtr)], center ); + contactNormal = normalize3( qtRotate( bodyAPtr->m_quat, contactNormal ) ); + + c->m_worldNormal = contactNormal; +// c->m_restituitionCoeff = 0.f; +// c->m_frictionCoeff = 0.7f; + c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16); + c->m_bodyAPtr = pair0; + c->m_bodyBPtr = pair1; + } + GET_NPOINTS(*c) = nContacts; + } + } +} + +__kernel +void manifold(__global float4* vIn, __global float4* vOut) +{ + __local float4 lCPoints[32]; + __local float4 lManifoldBuffer[64]; + __local int lNContacts; + __local float4 ab; + + if( GET_LOCAL_IDX<32 ) + { + lCPoints[GET_LOCAL_IDX] = vIn[GET_GLOBAL_IDX]; + } + + if( GET_LOCAL_IDX == 0 ) + { + lNContacts = 32; + ab = vIn[GET_GLOBAL_IDX]; + } + + GROUP_LDS_BARRIER; + + float4 center = extractManifold( lCPoints, lManifoldBuffer, &lNContacts, ab ); + + if( GET_LOCAL_IDX < lNContacts ) + { + vOut[4*GET_GROUP_IDX+GET_LOCAL_IDX] = lCPoints[GET_LOCAL_IDX]; + } + +} + +//#define COMBINE_REDUCTION + +__kernel +__attribute__((reqd_work_group_size(64, 1, 1))) +void NarrowphaseKernel( const __global int2* restrict pairs, const __global ShapeData* shapeData, const __global BodyData* restrict bodyDatas, + __global Contact4* restrict contactsOut, + counter32_t nContactsOut, ConstBuffer cb ) +{ + // 2.5K LDS + __local Contact4 ldsContacts[2]; + __local BodyData bodyA; + __local BodyData bodyB; + __local ShapeDeviceData shapeA; + __local ShapeDeviceData shapeB; + __local float4 lCPointsA[32*2]; + __local int lNContactsA; + __local float4* lCPointsB = lCPointsA+32; + __local int lNContactsB; +#ifdef COMBINE_REDUCTION + __local float4 lManifoldBuffer[64*2]; +#else + __local float4 lManifoldBuffer[64]; +#endif + __local int2 iPairAB; + + const int capacity = cb.m_capacity; + const float collisionMargin = cb.m_collisionMargin; + + + int pairIdx = GET_GROUP_IDX; +// for(int pairIdx = GET_GROUP_IDX; pairIdxm_height4[idx] = shapeData[ myShapeIdx ].m_height4[idx]; + + idx+=32; + } + } + + GROUP_LDS_BARRIER; + + testVtx2( &bodyA, &bodyB, &shapeA, &shapeB, &lNContactsA, lCPointsA, &lNContactsB, lCPointsB, collisionMargin ); + + GROUP_LDS_BARRIER; + + float4 ab = bodyB.m_pos - bodyA.m_pos; + float4 center[2]; + + if( lNContactsA != 0 || lNContactsB != 0 ) + { + float4 abInA; + abInA = qtInvRotate( bodyA.m_quat, ab ); + + float4 abInB; + abInB = qtInvRotate( bodyB.m_quat, ab ); + +#ifdef COMBINE_REDUCTION + extractManifold2( lCPointsA, &lNContactsA, abInA, + lCPointsB, &lNContactsB, abInB, + lManifoldBuffer, center ); +#else + if( lNContactsA != 0 ) + center[0] = extractManifold( lCPointsA, lManifoldBuffer, &lNContactsA, abInA ); + if( lNContactsB != 0 ) + center[1] = extractManifold( lCPointsB, lManifoldBuffer, &lNContactsB, abInB ); +#endif + } + + int firstSet = GET_LOCAL_IDX/32; + +/* + if( GET_LOCAL_IDX == 0 ) // for debug + { + ldsContacts[0].m_worldNormal = make_float4(-1,-1,-1,0); + ldsContacts[0].m_bodyAPtr = 0; + ldsContacts[0].m_bodyBPtr = 0; + ldsContacts[0].m_batchIdx = 111; + ldsContacts[1].m_worldNormal = make_float4(-1,-1,-1,0); + ldsContacts[1].m_bodyAPtr = 0; + ldsContacts[1].m_bodyBPtr = 0; + ldsContacts[1].m_batchIdx = 111; + } +*/ + bool doReduction = true; + if( doReduction ) + { + GROUP_LDS_BARRIER; + + output2LDS( (firstSet)?&bodyA: &bodyB, (firstSet)?&bodyB : &bodyA, + (firstSet)?iPairAB.x : iPairAB.y, (firstSet)?iPairAB.y : iPairAB.x, + (firstSet)?lNContactsA : lNContactsB, (firstSet)?lCPointsA:lCPointsB, + (firstSet)?center[0] : center[1], shapeData, (firstSet)?&ldsContacts[0]: &ldsContacts[1], collisionMargin ); + + GROUP_LDS_BARRIER; + + if( GET_LOCAL_IDX == 0 ) + { + if( lNContactsA && lNContactsB ) + { + float nDotn = dot3F4( ldsContacts[0].m_worldNormal, ldsContacts[1].m_worldNormal ); + if( nDotn < -(1.f-0.01f) ) + { + if( ldsContacts[0].m_bodyAPtr > ldsContacts[1].m_bodyAPtr ) + lNContactsA = 0; + else + lNContactsB = 0; + } + } + } + + if( GET_LOCAL_IDX == 0 ) + { + int n = lNContactsA; + if( n != 0 ) + { + int dstIdx; + AppendInc( nContactsOut, dstIdx ); + if( dstIdx < capacity ) + { int idx = 0; + contactsOut[ dstIdx ] = ldsContacts[idx]; + contactsOut[ dstIdx].m_batchIdx = pairIdx; + } + } + + n = lNContactsB; + if( n != 0 ) + { + int dstIdx; + AppendInc( nContactsOut, dstIdx ); + if( dstIdx < capacity ) + { int idx = 1; + contactsOut[ dstIdx ] = ldsContacts[idx]; + contactsOut[ dstIdx].m_batchIdx = pairIdx; + } + } + } + + GROUP_LDS_BARRIER; + } + else + { + //output2( (firstSet)?&bodyA: &bodyB, (firstSet)?&bodyB : &bodyA, + // (firstSet)?iPairAB.x : iPairAB.y, (firstSet)?iPairAB.y : iPairAB.x, + // (firstSet)?&lNContactsA : &lNContactsB, (firstSet)?lCPointsA:lCPointsB, + // (firstSet)?center[0] : center[1], shapeData, contactsOut, nContactsOut, capacity, collisionMargin ); + } + } +} + + +__kernel +__attribute__((reqd_work_group_size(64, 1, 1))) +void NarrowphaseWithPlaneKernel( const __global int2* restrict pairs, const __global ShapeData* shapeData, const __global BodyData* restrict bodyDatas, + __global Contact4* restrict contactsOut, + counter32_t nContactsOut, ConstBuffer cb ) +{ + // 2.5K LDS + __local BodyData bodyA; + __local BodyData bodyB; + __local ShapeDeviceData shapeA; + __local ShapeDeviceData shapeB; + __local float4 lCPointsA[32*2]; + __local int lNContactsA; +// __local float4* lCPointsB = lCPointsA+32; +// __local int lNContactsB; + __local float4 lManifoldBuffer[64]; + __local int2 iPairAB; + + const int capacity = cb.m_capacity; + const float collisionMargin = cb.m_collisionMargin; + + int pairIdx = GET_GROUP_IDX; + { + if( GET_LOCAL_IDX == 0 ) // load Bodies + { + int2 pair = pairs[pairIdx]; + iPairAB = make_int2(pair.x, pair.y); + bodyA = bodyDatas[ pair.x ]; + bodyB = bodyDatas[ pair.y ]; + shapeA.m_scale = shapeData[ GET_SHAPE_IDX(bodyA) ].m_scale; + shapeB.m_scale = shapeData[ GET_SHAPE_IDX(bodyB) ].m_scale; + lNContactsA = 0; +// lNContactsB = 0; + } + + GROUP_LDS_BARRIER; + + if (bodyB.m_invMass == 0.f) + return; + + // todo. can check if the shape is the same to previous one. If same, dont read + { // load shape data + int idx = GET_LOCAL_IDX%32; + int bIdx = GET_LOCAL_IDX/32; + __local ShapeDeviceData* myShape = (bIdx==0)?&shapeA: &shapeB; + int myShapeIdx = (bIdx==0)?GET_SHAPE_IDX(bodyA): GET_SHAPE_IDX(bodyB); + + while( idx < HEIGHT_RES*HEIGHT_RES*6/4 ) + { + myShape->m_height4[idx] = shapeData[ myShapeIdx ].m_height4[idx]; + + idx+=32; + } + } + + GROUP_LDS_BARRIER; + + float4 nA = make_float4(0,1,0,0); + + +// testVtx2( &bodyA, &bodyB, &shapeA, &shapeB, &lNContactsA, lCPointsA, &lNContactsB, lCPointsB ); + testVtxWithPlane( &bodyA, &bodyB, nA, &shapeB, &lNContactsA, lCPointsA, collisionMargin ); + + GROUP_LDS_BARRIER; + +// float4 ab = bodyB.m_pos - bodyA.m_pos; + float4 center[2]; + + if( lNContactsA != 0 ) + { + float4 abInA; + abInA = nA;//qtInvRotate( bodyA.m_quat, ab ); + + if( lNContactsA != 0 ) + center[0] = extractManifold( lCPointsA, lManifoldBuffer, &lNContactsA, abInA ); + } + +// int firstSet = GET_LOCAL_IDX/32; + + output2_1( &bodyA, &bodyB, + iPairAB.x, iPairAB.y, + &lNContactsA, lCPointsA, + center[0], nA, shapeData, contactsOut, nContactsOut, capacity, collisionMargin ); + } +} \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/ChNarrowphaseKernels.h b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/ChNarrowphaseKernels.h new file mode 100644 index 000000000..ff846cb60 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/ChNarrowphaseKernels.h @@ -0,0 +1,1616 @@ +static const char* narrowphaseKernelsCL= \ +"\n" +"#pragma OPENCL EXTENSION cl_amd_printf : enable\n" +"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" +"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" +"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" +"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" +"\n" +"#ifdef cl_ext_atomic_counters_32\n" +"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" +"#else\n" +"#define counter32_t volatile global int*\n" +"#endif\n" +"\n" +"\n" +"typedef unsigned int u32;\n" +"typedef unsigned short u16;\n" +"typedef unsigned char u8;\n" +"\n" +"#define GET_GROUP_IDX get_group_id(0)\n" +"#define GET_LOCAL_IDX get_local_id(0)\n" +"#define GET_GLOBAL_IDX get_global_id(0)\n" +"#define GET_GROUP_SIZE get_local_size(0)\n" +"#define GET_NUM_GROUPS get_num_groups(0)\n" +"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" +"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" +"#define AtomInc(x) atom_inc(&(x))\n" +"#define AtomInc1(x, out) out = atom_inc(&(x))\n" +"#define AppendInc(x, out) out = atomic_inc(x)\n" +"#define AtomAdd(x, value) atom_add(&(x), value)\n" +"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" +"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" +"\n" +"\n" +"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" +"\n" +"#define make_float4 (float4)\n" +"#define make_float2 (float2)\n" +"#define make_uint4 (uint4)\n" +"#define make_int4 (int4)\n" +"#define make_uint2 (uint2)\n" +"#define make_int2 (int2)\n" +"\n" +"\n" +"#define max2 max\n" +"#define min2 min\n" +"\n" +"\n" +"///////////////////////////////////////\n" +"// Vector\n" +"///////////////////////////////////////\n" +"__inline\n" +"float fastDiv(float numerator, float denominator)\n" +"{\n" +" return native_divide(numerator, denominator); \n" +"// return numerator/denominator; \n" +"}\n" +"\n" +"__inline\n" +"float4 fastDiv4(float4 numerator, float4 denominator)\n" +"{\n" +" return native_divide(numerator, denominator); \n" +"}\n" +"\n" +"__inline\n" +"float fastSqrtf(float f2)\n" +"{\n" +" return native_sqrt(f2);\n" +"// return sqrt(f2);\n" +"}\n" +"\n" +"__inline\n" +"float fastRSqrt(float f2)\n" +"{\n" +" return native_rsqrt(f2);\n" +"}\n" +"\n" +"__inline\n" +"float fastLength4(float4 v)\n" +"{\n" +" return fast_length(v);\n" +"}\n" +"\n" +"__inline\n" +"float4 fastNormalize4(float4 v)\n" +"{\n" +" return fast_normalize(v);\n" +"}\n" +"\n" +"\n" +"__inline\n" +"float sqrtf(float a)\n" +"{\n" +"// return sqrt(a);\n" +" return native_sqrt(a);\n" +"}\n" +"\n" +"__inline\n" +"float4 cross3(float4 a, float4 b)\n" +"{\n" +" return cross(a,b);\n" +"}\n" +"\n" +"__inline\n" +"float dot3F4(float4 a, float4 b)\n" +"{\n" +" float4 a1 = make_float4(a.xyz,0.f);\n" +" float4 b1 = make_float4(b.xyz,0.f);\n" +" return dot(a1, b1);\n" +"}\n" +"\n" +"__inline\n" +"float length3(const float4 a)\n" +"{\n" +" return sqrtf(dot3F4(a,a));\n" +"}\n" +"\n" +"__inline\n" +"float dot4(const float4 a, const float4 b)\n" +"{\n" +" return dot( a, b );\n" +"}\n" +"\n" +"// for height\n" +"__inline\n" +"float dot3w1(const float4 point, const float4 eqn)\n" +"{\n" +" return dot3F4(point,eqn) + eqn.w;\n" +"}\n" +"\n" +"__inline\n" +"float4 normalize3(const float4 a)\n" +"{\n" +" float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" +" return fastNormalize4( n );\n" +"// float length = sqrtf(dot3F4(a, a));\n" +"// return 1.f/length * a;\n" +"}\n" +"\n" +"__inline\n" +"float4 normalize4(const float4 a)\n" +"{\n" +" float length = sqrtf(dot4(a, a));\n" +" return 1.f/length * a;\n" +"}\n" +"\n" +"__inline\n" +"float4 createEquation(const float4 a, const float4 b, const float4 c)\n" +"{\n" +" float4 eqn;\n" +" float4 ab = b-a;\n" +" float4 ac = c-a;\n" +" eqn = normalize3( cross3(ab, ac) );\n" +" eqn.w = -dot3F4(eqn,a);\n" +" return eqn;\n" +"}\n" +"\n" +"///////////////////////////////////////\n" +"// Matrix3x3\n" +"///////////////////////////////////////\n" +"\n" +"typedef struct\n" +"{\n" +" float4 m_row[3];\n" +"}Matrix3x3;\n" +"\n" +"__inline\n" +"Matrix3x3 mtZero();\n" +"\n" +"__inline\n" +"Matrix3x3 mtIdentity();\n" +"\n" +"__inline\n" +"Matrix3x3 mtTranspose(Matrix3x3 m);\n" +"\n" +"__inline\n" +"Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b);\n" +"\n" +"__inline\n" +"float4 mtMul1(Matrix3x3 a, float4 b);\n" +"\n" +"__inline\n" +"float4 mtMul3(float4 a, Matrix3x3 b);\n" +"\n" +"__inline\n" +"Matrix3x3 mtZero()\n" +"{\n" +" Matrix3x3 m;\n" +" m.m_row[0] = (float4)(0.f);\n" +" m.m_row[1] = (float4)(0.f);\n" +" m.m_row[2] = (float4)(0.f);\n" +" return m;\n" +"}\n" +"\n" +"__inline\n" +"Matrix3x3 mtIdentity()\n" +"{\n" +" Matrix3x3 m;\n" +" m.m_row[0] = (float4)(1,0,0,0);\n" +" m.m_row[1] = (float4)(0,1,0,0);\n" +" m.m_row[2] = (float4)(0,0,1,0);\n" +" return m;\n" +"}\n" +"\n" +"__inline\n" +"Matrix3x3 mtTranspose(Matrix3x3 m)\n" +"{\n" +" Matrix3x3 out;\n" +" out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" +" out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" +" out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" +" return out;\n" +"}\n" +"\n" +"__inline\n" +"Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b)\n" +"{\n" +" Matrix3x3 transB;\n" +" transB = mtTranspose( b );\n" +" Matrix3x3 ans;\n" +" // why this doesn't run when 0ing in the for{}\n" +" a.m_row[0].w = 0.f;\n" +" a.m_row[1].w = 0.f;\n" +" a.m_row[2].w = 0.f;\n" +" for(int i=0; i<3; i++)\n" +" {\n" +"// a.m_row[i].w = 0.f;\n" +" ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]);\n" +" ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]);\n" +" ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]);\n" +" ans.m_row[i].w = 0.f;\n" +" }\n" +" return ans;\n" +"}\n" +"\n" +"__inline\n" +"float4 mtMul1(Matrix3x3 a, float4 b)\n" +"{\n" +" float4 ans;\n" +" ans.x = dot3F4( a.m_row[0], b );\n" +" ans.y = dot3F4( a.m_row[1], b );\n" +" ans.z = dot3F4( a.m_row[2], b );\n" +" ans.w = 0.f;\n" +" return ans;\n" +"}\n" +"\n" +"__inline\n" +"float4 mtMul3(float4 a, Matrix3x3 b)\n" +"{\n" +" float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" +" float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" +" float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" +"\n" +" float4 ans;\n" +" ans.x = dot3F4( a, colx );\n" +" ans.y = dot3F4( a, coly );\n" +" ans.z = dot3F4( a, colz );\n" +" return ans;\n" +"}\n" +"\n" +"///////////////////////////////////////\n" +"// Quaternion\n" +"///////////////////////////////////////\n" +"\n" +"typedef float4 Quaternion;\n" +"\n" +"__inline\n" +"Quaternion qtMul(Quaternion a, Quaternion b);\n" +"\n" +"__inline\n" +"Quaternion qtNormalize(Quaternion in);\n" +"\n" +"__inline\n" +"float4 qtRotate(Quaternion q, float4 vec);\n" +"\n" +"__inline\n" +"Quaternion qtInvert(Quaternion q);\n" +"\n" +"__inline\n" +"Matrix3x3 qtGetRotationMatrix(Quaternion q);\n" +"\n" +"\n" +"\n" +"__inline\n" +"Quaternion qtMul(Quaternion a, Quaternion b)\n" +"{\n" +" Quaternion ans;\n" +" ans = cross3( a, b );\n" +" ans += a.w*b+b.w*a;\n" +"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +" ans.w = a.w*b.w - dot3F4(a, b);\n" +" return ans;\n" +"}\n" +"\n" +"__inline\n" +"Quaternion qtNormalize(Quaternion in)\n" +"{\n" +" return fastNormalize4(in);\n" +"// in /= length( in );\n" +"// return in;\n" +"}\n" +"__inline\n" +"float4 qtRotate(Quaternion q, float4 vec)\n" +"{\n" +" Quaternion qInv = qtInvert( q );\n" +" float4 vcpy = vec;\n" +" vcpy.w = 0.f;\n" +" float4 out = qtMul(qtMul(q,vcpy),qInv);\n" +" return out;\n" +"}\n" +"\n" +"__inline\n" +"Quaternion qtInvert(Quaternion q)\n" +"{\n" +" return (Quaternion)(-q.xyz, q.w);\n" +"}\n" +"\n" +"__inline\n" +"float4 qtInvRotate(const Quaternion q, float4 vec)\n" +"{\n" +" return qtRotate( qtInvert( q ), vec );\n" +"}\n" +"\n" +"__inline\n" +"Matrix3x3 qtGetRotationMatrix(Quaternion quat)\n" +"{\n" +" float4 quat2 = (float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" +" Matrix3x3 out;\n" +"\n" +" out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" +" out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" +" out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" +" out.m_row[0].w = 0.f;\n" +"\n" +" out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" +" out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" +" out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" +" out.m_row[1].w = 0.f;\n" +"\n" +" out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" +" out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" +" out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" +" out.m_row[2].w = 0.f;\n" +"\n" +" return out;\n" +"}\n" +"\n" +"\n" +"#define WG_SIZE 64\n" +"#define HEIGHT_RES 4\n" +"#define SHAPE_CONVEX_HEIGHT_FIELD 1//keep this in sync with AdlCollisionShape.h!\n" +"\n" +"typedef struct\n" +"{\n" +" float4 m_normal[HEIGHT_RES*HEIGHT_RES*6];\n" +" u32 m_height4[HEIGHT_RES*HEIGHT_RES*6];\n" +" u32 m_supportHeight4[HEIGHT_RES*HEIGHT_RES*6];\n" +"\n" +" float m_scale;\n" +" float m_padding0;\n" +" float m_padding1;\n" +" float m_padding2;\n" +"} ShapeData;\n" +"\n" +"typedef struct\n" +"{\n" +" u32 m_height4[HEIGHT_RES*HEIGHT_RES*6/4];\n" +"\n" +" float m_scale;\n" +"} ShapeDeviceData;\n" +"\n" +"typedef struct\n" +"{\n" +" float4 m_pos;\n" +" float4 m_quat;\n" +" float4 m_linVel;\n" +" float4 m_angVel;\n" +"\n" +" u32 m_shapeIdx;\n" +" u32 m_shapeType;\n" +" \n" +" float m_invMass;\n" +" float m_restituitionCoeff;\n" +" float m_frictionCoeff;\n" +"} BodyData;\n" +"\n" +"typedef struct\n" +"{\n" +" float4 m_worldPos[4];\n" +" float4 m_worldNormal; // w: m_nPoints\n" +"// float m_restituitionCoeff;\n" +"// float m_frictionCoeff;\n" +" u32 m_coeffs;\n" +" u32 m_batchIdx;\n" +"// int m_nPoints;\n" +"// int m_padding0;\n" +"\n" +" u32 m_bodyAPtr;//x:m_bodyAPtr, y:m_bodyBPtr\n" +" u32 m_bodyBPtr;\n" +"} Contact4;\n" +"\n" +"#define GET_NPOINTS(x) (x).m_worldNormal.w\n" +"\n" +"\n" +"typedef struct\n" +"{\n" +" int m_nPairs;\n" +" float m_collisionMargin;\n" +" int m_capacity;\n" +" int m_paddings[1];\n" +"} ConstBuffer;\n" +"\n" +"__inline\n" +"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" +"{\n" +" return qtRotate( *orientation, *p ) + (*translation);\n" +"}\n" +"\n" +"__inline\n" +"float4 invTransform(const float4* p, const float4* translation, const Quaternion* orientation)\n" +"{\n" +" return qtRotate( qtInvert( *orientation ), (*p)-(*translation) ); // use qtInvRotate\n" +"}\n" +"\n" +"void CubeMapUtilsCalcCrd(const float4 p, int* faceIdxOut, float* x, float* y)\n" +"{\n" +" {\n" +" int idx;\n" +" float r2[] = {p.x*p.x, p.y*p.y, p.z*p.z};\n" +"\n" +" if (r2[1]>r2[0])\n" +" {\n" +" if (r2[2]>r2[1])\n" +" {\n" +" idx = 2;\n" +" \n" +" } else\n" +" {\n" +" idx = 1;\n" +" }\n" +" \n" +" } else\n" +" {\n" +" if (r2[2]>r2[0])\n" +" {\n" +" idx = 2;\n" +" } else\n" +" {\n" +" idx = 0;\n" +" }\n" +" }\n" +"\n" +" *faceIdxOut = (idx*2);\n" +"//==\n" +" float4 abs = make_float4( fabs(p.x), fabs(p.y), fabs(p.z), 0.f );\n" +"\n" +" float d;\n" +" if( idx == 0 )\n" +" {\n" +" *x = p.y;\n" +" *y = p.z;\n" +" d = abs.x;\n" +" *faceIdxOut += (p.x < 0.f)? 0: 1.f;\n" +" }\n" +" else if( idx == 1 )\n" +" {\n" +" *x = p.z;\n" +" *y = p.x;\n" +" d = abs.y;\n" +" *faceIdxOut += (p.y < 0.f)? 0: 1.f;\n" +" }\n" +" else\n" +" {\n" +" *x = p.x;\n" +" *y = p.y;\n" +" d = abs.z;\n" +" *faceIdxOut += (p.z < 0.f)? 0: 1.f;\n" +" }\n" +"\n" +" float dInv = (d==0.f)? 0.f: fastDiv(1.f,d);\n" +" *x = (*x*dInv+1.f)*0.5f;\n" +" *y = (*y*dInv+1.f)*0.5f;\n" +" }\n" +"}\n" +"\n" +"float4 CubeMapUtilsCalcVector(int faceIdx, float x, float y)\n" +"{\n" +" int dir = faceIdx/2;\n" +" float z = (faceIdx%2 == 0)? -1.f:1.f;\n" +"\n" +" x = x*2.f-1.f;\n" +" y = y*2.f-1.f;\n" +" \n" +" if( dir == 0 )\n" +" {\n" +" return make_float4(z, x, y, 0.f);\n" +" }\n" +" else if( dir == 1 )\n" +" {\n" +" return make_float4(y,z,x, 0.f);\n" +" }\n" +" else\n" +" {\n" +" return make_float4(x,y,z, 0.f);\n" +" }\n" +"}\n" +"\n" +"typedef int Face;\n" +"\n" +"u32 sample(__local ShapeDeviceData* shape, int face, int x, int y)\n" +"{\n" +"\n" +" int idx = HEIGHT_RES*HEIGHT_RES*face + x + y*HEIGHT_RES;\n" +" __local u8* height = (__local u8*)shape->m_height4;\n" +" return height[idx];\n" +"}\n" +"\n" +"u32 sampleSupportGlobal(__global ShapeData* shape, int face, int x, int y)\n" +"{\n" +"\n" +" int idx = HEIGHT_RES*HEIGHT_RES*face + x + y*HEIGHT_RES;\n" +" __global u8* height = (__global u8*)shape->m_supportHeight4;\n" +" return height[idx];\n" +"}\n" +"\n" +"float4 sampleNormal(__local ShapeData* shape, int face, int x, int y)\n" +"{\n" +" return shape->m_normal[HEIGHT_RES*HEIGHT_RES*face + x + y*HEIGHT_RES];\n" +"}\n" +"\n" +"float4 sampleNormalGlobal(const __global ShapeData* shape, int face, int x, int y)\n" +"{\n" +" return shape->m_normal[HEIGHT_RES*HEIGHT_RES*face + x + y*HEIGHT_RES];\n" +"}\n" +"\n" +"float4 ShapeDataCalcSamplePoint( __local const ShapeDeviceData* shape, int sIdx )//u8 height, int sIdx, float scale )\n" +"{\n" +" const float oneOver255 = 1.f/255.f;\n" +"\n" +" int faceIdx = fastDiv(sIdx,(HEIGHT_RES*HEIGHT_RES));\n" +" int r = (sIdx%(HEIGHT_RES*HEIGHT_RES));\n" +" int i = r/HEIGHT_RES;\n" +" int j = r%HEIGHT_RES;\n" +"\n" +" float4 v;\n" +" float x = fastDiv((i+0.5f),(float)HEIGHT_RES);\n" +" float y = fastDiv((j+0.5f),(float)HEIGHT_RES);\n" +" v = CubeMapUtilsCalcVector(faceIdx, x, y);\n" +" v = normalize3( v );\n" +"\n" +" int quantizedHeight = sample( shape, faceIdx, i, j );\n" +" float rheight = quantizedHeight*oneOver255*shape->m_scale;\n" +" return rheight*v;\n" +"}\n" +"\n" +"float ShapeDataQueryDistance(__local const ShapeDeviceData* shape, float4 p )\n" +"{\n" +" if( dot3F4( p, p ) >= shape->m_scale*shape->m_scale ) return FLT_MAX;\n" +"\n" +" const float oneOver255 = 1.f/255.f;\n" +"\n" +" int faceIdx;\n" +" float x, y;\n" +" CubeMapUtilsCalcCrd( p, &faceIdx, &x, &y );\n" +" x = (x*HEIGHT_RES) - 0.5f;\n" +" y = (y*HEIGHT_RES) - 0.5f;\n" +"\n" +" float height;\n" +" {\n" +" int xi = (int)(x);\n" +" int yi = (int)(y);\n" +" float dx = x-xi;\n" +" float dy = y-yi;\n" +"\n" +" {\n" +" int xip = min2((int)(HEIGHT_RES-1), xi+1);\n" +" int yip = min2((int)(HEIGHT_RES-1), yi+1);\n" +"\n" +" u32 xy = sample( shape, faceIdx, xi, yi );\n" +" u32 xpy = sample( shape, faceIdx, xip, yi );\n" +" u32 xpyp = sample( shape, faceIdx, xip, yip );\n" +" u32 xyp = sample( shape, faceIdx, xi, yip );\n" +"\n" +" height = (xy*(1.f-dx)+xpy*dx)*(1.f-dy) + (xyp*(1.f-dx)+xpyp*dx)*dy;\n" +" height = height*oneOver255*shape->m_scale;\n" +"\n" +" p.w = 0.f;\n" +"\n" +" height = fastLength4( p ) - height;\n" +" }\n" +" }\n" +"\n" +" return height;\n" +"}\n" +"\n" +"float ShapeDataQuerySupportHeight(__global ShapeData* shape, float4 p )\n" +"{\n" +" int faceIdx;\n" +" float x, y;\n" +" CubeMapUtilsCalcCrd( p, &faceIdx, &x, &y );\n" +" x = (x*HEIGHT_RES) - 0.5f;\n" +" y = (y*HEIGHT_RES) - 0.5f;\n" +"\n" +" float height;\n" +" {\n" +" int xi = (int)(x);\n" +" int yi = (int)(y);\n" +"\n" +" {\n" +" int xip = min2((int)(HEIGHT_RES-1), xi+1);\n" +" int yip = min2((int)(HEIGHT_RES-1), yi+1);\n" +"\n" +" u32 xy = sampleSupportGlobal( shape, faceIdx, xi, yi );\n" +" u32 xpy = sampleSupportGlobal( shape, faceIdx, xip, yi );\n" +" u32 xpyp = sampleSupportGlobal( shape, faceIdx, xip, yip );\n" +" u32 xyp = sampleSupportGlobal( shape, faceIdx, xi, yip );\n" +"\n" +" height = max2( xy, max2( xpy, max2( xpyp, xyp ) ) );\n" +" height = height/255.f*shape->m_scale;\n" +" }\n" +" }\n" +"\n" +" return height;\n" +"\n" +"}\n" +"\n" +"float4 ShapeDataQueryNormal(__global const ShapeData* shape, float4 p )\n" +"{\n" +" int faceIdx;\n" +" float x, y;\n" +" CubeMapUtilsCalcCrd( p, &faceIdx, &x, &y );\n" +" x = (x*HEIGHT_RES) - 0.5f;\n" +" y = (y*HEIGHT_RES) - 0.5f;\n" +"\n" +" float4 normalOut;\n" +" {\n" +" int xi = (int)(x);\n" +" int yi = (int)(y);\n" +"\n" +" normalOut = sampleNormalGlobal( shape, faceIdx, xi, yi );\n" +" }\n" +" return normalOut;\n" +"}\n" +"\n" +"\n" +"\n" +"// kernels\n" +"\n" +"\n" +"__kernel\n" +"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" +"void SupportCullingKernel( __global int2* restrict gPairsIn, __global ShapeData* gShapes, \n" +" __global BodyData* gBodies, \n" +" __global int2* gPairsOut, \n" +" counter32_t gNPairs,\n" +" ConstBuffer cb )\n" +"{\n" +" int gIdx = GET_GLOBAL_IDX;\n" +" if( gIdx >= cb.m_nPairs ) return;\n" +"\n" +" const float collisionMargin = cb.m_collisionMargin;\n" +" const int capacity = cb.m_capacity;\n" +"\n" +" int2 pair = gPairsIn[gIdx];\n" +" BodyData bodyA = gBodies[pair.x];\n" +" BodyData bodyB = gBodies[pair.y];\n" +" int shapeAIdx = bodyA.m_shapeIdx;\n" +" int shapeBIdx = bodyB.m_shapeIdx;\n" +"\n" +"\n" +" bool collide = false;\n" +" \n" +" //only collide if one of the two bodies has a non-zero mass\n" +" if (bodyA.m_invMass==0.f && bodyB.m_invMass==0.f)\n" +" return;\n" +" \n" +" \n" +" if (bodyA.m_shapeType == SHAPE_CONVEX_HEIGHT_FIELD && bodyB.m_shapeType==SHAPE_CONVEX_HEIGHT_FIELD)\n" +" {\n" +" float4 abInA, baInB;\n" +" float4 ab = bodyB.m_pos - bodyA.m_pos;\n" +" {\n" +" abInA = qtInvRotate( bodyA.m_quat, ab );\n" +" baInB = qtInvRotate( bodyB.m_quat, -ab );\n" +" }\n" +" float hA = ShapeDataQuerySupportHeight( gShapes+shapeAIdx, abInA );\n" +" float hB = ShapeDataQuerySupportHeight( gShapes+shapeBIdx, baInB );\n" +"\n" +" float h2 = dot3F4( ab, ab );\n" +"\n" +" collide = ( hA + hB + collisionMargin > sqrtf(h2) );\n" +" }\n" +"\n" +" if( collide )\n" +" {\n" +" int dstIdx;\n" +" AppendInc( gNPairs, dstIdx );\n" +" if( dstIdx < capacity )\n" +" gPairsOut[dstIdx] = pair;\n" +" }\n" +"}\n" +"\n" +"\n" +"#define PARALLEL_DO(execution, n) for(int ie=0; ie h[lIdx+1].y)? h[lIdx]: h[lIdx+1];" +" mem_fence( CLK_LOCAL_MEM_FENCE );" +" h[lIdx] = (h[lIdx].y > h[lIdx+2].y)? h[lIdx]: h[lIdx+2];" +" mem_fence( CLK_LOCAL_MEM_FENCE );" +" h[lIdx] = (h[lIdx].y > h[lIdx+4].y)? h[lIdx]: h[lIdx+4];" +" mem_fence( CLK_LOCAL_MEM_FENCE );" +" h[lIdx] = (h[lIdx].y > h[lIdx+8].y)? h[lIdx]: h[lIdx+8];" +" mem_fence( CLK_LOCAL_MEM_FENCE );" +" h[lIdx] = (h[lIdx].y > h[lIdx+16].y)? h[lIdx]: h[lIdx+16];" +" }}\n" +"\n" +"#define PARALLEL_REDUCE32(h) " +" {int lIdx = GET_LOCAL_IDX;" +" if( lIdx < 32 )" +" {" +" h[lIdx] += h[lIdx+1];" +" mem_fence( CLK_LOCAL_MEM_FENCE );" +" h[lIdx] += h[lIdx+2];" +" mem_fence( CLK_LOCAL_MEM_FENCE );" +" h[lIdx] += h[lIdx+4];" +" mem_fence( CLK_LOCAL_MEM_FENCE );" +" h[lIdx] += h[lIdx+8];" +" mem_fence( CLK_LOCAL_MEM_FENCE );" +" h[lIdx] += h[lIdx+16];" +" }}\n" +"\n" +"\n" +"float4 extractManifold(__local float4* p, __local float4* h, __local int* nPointsPtr, float4 nearNormal)\n" +"{\n" +" int nPoints = *nPointsPtr;\n" +" float4 center = make_float4(0,0,0,0);\n" +" { // calculate center\n" +" nPoints = min2( nPoints, 32 );\n" +" {\n" +" int lIdx = GET_LOCAL_IDX;\n" +" h[lIdx] = p[lIdx];\n" +" h[lIdx] = (lIdx= nPoints ) a[ie] = make_int4(-0xfffffff, -0xfffffff, -0xfffffff, -0xfffffff);\n" +" }\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" { // vector reduce, h[64]\n" +" int lIdx = GET_LOCAL_IDX;\n" +" if( lIdx < 32 )\n" +" {\n" +" h[lIdx] = max2( h[lIdx], h[lIdx+1] );\n" +" mem_fence( CLK_LOCAL_MEM_FENCE );\n" +" h[lIdx] = max2( h[lIdx], h[lIdx+2] );\n" +" mem_fence( CLK_LOCAL_MEM_FENCE );\n" +" h[lIdx] = max2( h[lIdx], h[lIdx+4] );\n" +" mem_fence( CLK_LOCAL_MEM_FENCE );\n" +" h[lIdx] = max2( h[lIdx], h[lIdx+8] );\n" +" mem_fence( CLK_LOCAL_MEM_FENCE );\n" +" h[lIdx] = max2( h[lIdx], h[lIdx+16] );\n" +" }\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +" }\n" +" {\n" +" { // set to idx\n" +" idx[0] = (int)a[0].x & 0xff;\n" +" idx[1] = (int)a[0].y & 0xff;\n" +" idx[2] = (int)a[0].z & 0xff;\n" +" idx[3] = (int)a[0].w & 0xff;\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +" float4 selection;\n" +" if( GET_LOCAL_IDX < 4 ) selection = p[idx[GET_LOCAL_IDX]];\n" +"\n" +" GROUP_LDS_BARRIER;\n" +" if( GET_LOCAL_IDX < 4 ) p[GET_LOCAL_IDX] = selection;\n" +" }\n" +"\n" +"\n" +" return center;\n" +"}\n" +"\n" +"void extractManifold1(__local float4* p, __local float4* h, __local int* nPointsPtr, float4 center)\n" +"{\n" +" __local int* a = (__local int*)h;\n" +" {\n" +" GROUP_LDS_BARRIER;\n" +" float4 selection;\n" +" if( GET_LOCAL_IDX < 4 )\n" +" {\n" +" int idx = (int)a[GET_LOCAL_IDX] & 0xff;\n" +" selection = p[idx];\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +" if( GET_LOCAL_IDX < 4 ) p[GET_LOCAL_IDX] = selection;\n" +" }\n" +"\n" +"}\n" +"\n" +"void extractManifold2( __local float4* p0, __local int* nPointsPtr0, float4 nearNormal0,\n" +" __local float4* p1, __local int* nPointsPtr1, float4 nearNormal1,\n" +" __local float4* h, float4 centerOut[2])\n" +"{\n" +"\n" +" int nPoints[2];\n" +" nPoints[0] = *nPointsPtr0;\n" +" nPoints[1] = *nPointsPtr1;\n" +" float4 center[2];\n" +" center[0] = make_float4(0,0,0,0);\n" +" center[1] = make_float4(0,0,0,0);\n" +" { // calculate center\n" +" nPoints[0] = min2( nPoints[0], 32 );\n" +" nPoints[1] = min2( nPoints[1], 32 );\n" +" {\n" +" int lIdx = GET_LOCAL_IDX;\n" +" h[lIdx] = (lIdx= nPoints[setIdx] ) a[ie + setIdx*64] = make_int4(-0xfffffff, -0xfffffff, -0xfffffff, -0xfffffff);\n" +"\n" +" a[ie + 32] = make_int4(-0xfffffff, -0xfffffff, -0xfffffff, -0xfffffff);\n" +" }\n" +" }\n" +" }\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" { // vector reduce, h[64]\n" +" int bIdx = GET_LOCAL_IDX/32;\n" +" int eIdx = GET_LOCAL_IDX%32;\n" +" int lIdx = eIdx + bIdx*64;\n" +" {\n" +" h[lIdx] = max2( h[lIdx], h[lIdx+1] );\n" +" mem_fence( CLK_LOCAL_MEM_FENCE );\n" +" h[lIdx] = max2( h[lIdx], h[lIdx+2] );\n" +" mem_fence( CLK_LOCAL_MEM_FENCE );\n" +" h[lIdx] = max2( h[lIdx], h[lIdx+4] );\n" +" mem_fence( CLK_LOCAL_MEM_FENCE );\n" +" h[lIdx] = max2( h[lIdx], h[lIdx+8] );\n" +" mem_fence( CLK_LOCAL_MEM_FENCE );\n" +" h[lIdx] = max2( h[lIdx], h[lIdx+16] );\n" +" }\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +" }\n" +" __local int* a = (__local int*)h;\n" +" {\n" +" GROUP_LDS_BARRIER;\n" +" \n" +" float4 selection;\n" +"\n" +" int bIdx = GET_LOCAL_IDX/32;\n" +" int eIdx = GET_LOCAL_IDX%32;\n" +"\n" +" if( eIdx < 4 )\n" +" {\n" +" int idx = (int)a[eIdx+64*4*bIdx] & 0xff;\n" +" selection = p0[idx+32*bIdx];\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +" if( eIdx < 4 ) p0[eIdx+32*bIdx] = selection;\n" +" }\n" +"}\n" +"\n" +"/*\n" +"1. Query Normal\n" +"2. Fill Normal\n" +"3. A->B, B->A\n" +"*/\n" +"\n" +"void testVtx(__local BodyData* bodyAPtr, __local BodyData* bodyBPtr,\n" +" __local ShapeDeviceData* shapeAPtr, __local ShapeDeviceData* shapeBPtr,\n" +" __local int* lNContacts, __local float4* lCPoints)\n" +"{\n" +" int pIdx = GET_LOCAL_IDX;\n" +" float4 bodyAPos = bodyAPtr->m_pos;\n" +" float4 bodyBPos = bodyBPtr->m_pos;\n" +" Quaternion bodyAQuat = bodyAPtr->m_quat;\n" +" Quaternion bodyBQuat = bodyBPtr->m_quat;\n" +" while( pIdx < HEIGHT_RES*HEIGHT_RES*6 )\n" +" {\n" +" float4 pInB = ShapeDataCalcSamplePoint( shapeBPtr, pIdx );\n" +"\n" +" float4 pInW = transform( &pInB, &bodyBPos, &bodyBQuat );\n" +"// Aabb bodyAAabb = bodyAPtr->m_aabb;\n" +"// if( AabbOverlapsPoint( &bodyAAabb, pInW ) )\n" +" {\n" +" float4 pInA = invTransform( &pInW, &bodyAPos, &bodyAQuat );\n" +"\n" +" float dist = ShapeDataQueryDistance( shapeAPtr, pInA );\n" +" if( dist < 0.010f )\n" +" {\n" +" int dstIdx = atom_add( lNContacts, 1 );\n" +" if( dstIdx < 32 )\n" +" {\n" +" lCPoints[ dstIdx ] = make_float4( pInA.x, pInA.y, pInA.z, dist );\n" +" }\n" +" }\n" +" }\n" +"\n" +" pIdx += GET_GROUP_SIZE;\n" +" }\n" +"}\n" +"\n" +"void testVtx2(__local const BodyData* bodyA, __local const BodyData* bodyB,\n" +" __local const ShapeDeviceData* shapeA, __local const ShapeDeviceData* shapeB,\n" +" __local int* lNContactsA, __local float4* lCPointsA,\n" +" __local int* lNContactsB, __local float4* lCPointsB, float collisionMargin )\n" +"{\n" +" int pIdx = GET_LOCAL_IDX;\n" +"\n" +" while( pIdx < HEIGHT_RES*HEIGHT_RES*6*2 )\n" +" {\n" +" __local const BodyData* bodyAPtr =( pIdx < HEIGHT_RES*HEIGHT_RES*6 )?bodyA:bodyB;\n" +" __local const BodyData* bodyBPtr =( pIdx < HEIGHT_RES*HEIGHT_RES*6 )?bodyB:bodyA;\n" +" __local const ShapeDeviceData* shapeAPtr =( pIdx < HEIGHT_RES*HEIGHT_RES*6 )?shapeA:shapeB;\n" +" __local const ShapeDeviceData* shapeBPtr =( pIdx < HEIGHT_RES*HEIGHT_RES*6 )?shapeB:shapeA;\n" +" __local int* lNContacts =( pIdx < HEIGHT_RES*HEIGHT_RES*6 )?lNContactsA:lNContactsB;\n" +" __local float4* lCPoints =( pIdx < HEIGHT_RES*HEIGHT_RES*6 )?lCPointsA:lCPointsB;\n" +"\n" +" float4 bodyAPos = bodyAPtr->m_pos;\n" +" float4 bodyBPos = bodyBPtr->m_pos;\n" +" Quaternion bodyAQuat = bodyAPtr->m_quat;\n" +" Quaternion bodyBQuat = bodyBPtr->m_quat;\n" +"\n" +" float4 pInB = ShapeDataCalcSamplePoint( shapeBPtr, pIdx%(HEIGHT_RES*HEIGHT_RES*6) );\n" +"\n" +" float4 pInW = transform( &pInB, &bodyBPos, &bodyBQuat );\n" +"// Aabb bodyAAabb = bodyAPtr->m_aabb;\n" +"// if( AabbOverlapsPoint( &bodyAAabb, pInW ) )\n" +" {\n" +" float4 pInA = invTransform( &pInW, &bodyAPos, &bodyAQuat );\n" +"\n" +" float dist = ShapeDataQueryDistance( shapeAPtr, pInA );\n" +" if( dist < collisionMargin )\n" +" {\n" +" int dstIdx = atom_add( lNContacts, 1 );\n" +" if( dstIdx < 32 )\n" +" {\n" +" lCPoints[ dstIdx ] = make_float4( pInA.x, pInA.y, pInA.z, dist );\n" +" }\n" +" }\n" +" }\n" +"\n" +" pIdx += GET_GROUP_SIZE;\n" +" }\n" +"}\n" +"\n" +"void testVtxWithPlane(__local BodyData* bodyA, __local BodyData* bodyB,\n" +" float4 nA, __local ShapeDeviceData* shapeB,\n" +" __local int* lNContactsA, __local float4* lCPointsA, float collisionMargin)\n" +"{\n" +" int pIdx = GET_LOCAL_IDX;\n" +"\n" +" while( pIdx < HEIGHT_RES*HEIGHT_RES*6 )\n" +" {\n" +" __local BodyData* bodyAPtr =bodyA;\n" +" __local BodyData* bodyBPtr =bodyB;\n" +" __local ShapeDeviceData* shapeBPtr =shapeB;\n" +" __local int* lNContacts =lNContactsA;\n" +" __local float4* lCPoints =lCPointsA;\n" +"\n" +" float4 bodyAPos = bodyAPtr->m_pos;\n" +" float4 bodyBPos = bodyBPtr->m_pos;\n" +" Quaternion bodyAQuat = bodyAPtr->m_quat;\n" +" Quaternion bodyBQuat = bodyBPtr->m_quat;\n" +"\n" +" float4 pInB = ShapeDataCalcSamplePoint( shapeBPtr, pIdx%(HEIGHT_RES*HEIGHT_RES*6) );\n" +"\n" +" float4 pInW = transform( &pInB, &bodyBPos, &bodyBQuat );\n" +" {\n" +" float4 pInA = invTransform( &pInW, &bodyAPos, &bodyAQuat );\n" +"\n" +" float dist = dot3w1( pInA, nA );//ShapeDataQueryDistance( shapeAPtr, pInA );\n" +" if( dist < collisionMargin )\n" +" {\n" +" int dstIdx = atom_add( lNContacts, 1 );\n" +" if( dstIdx < 32 )\n" +" {\n" +" lCPoints[ dstIdx ] = make_float4( pInA.x, pInA.y, pInA.z, dist );\n" +" }\n" +" }\n" +" }\n" +"\n" +" pIdx += GET_GROUP_SIZE;\n" +" }\n" +"}\n" +"\n" +"#define GET_SHAPE_IDX(x) (int)((x).m_shapeIdx)\n" +"\n" +"void output(__local BodyData* bodyAPtr, __local BodyData* bodyBPtr,\n" +" __local int2* iPair,\n" +" __local int* lNContacts, __local float4* lCPoints,\n" +" float4 center, \n" +" __global ShapeData* shapeData, __global Contact4* contactsOut, float collisionMargin)\n" +"{\n" +" if( *lNContacts != 0 )\n" +" {\n" +" int nContacts = min2( *lNContacts, 4 );\n" +"\n" +" __global Contact4* c = contactsOut;\n" +"\n" +" if( GET_LOCAL_IDX < nContacts )\n" +" {\n" +" int i = GET_LOCAL_IDX;\n" +" float4 p = lCPoints[i];\n" +" float4 bodyAPos = bodyAPtr->m_pos;\n" +" Quaternion bodyAQuat = bodyAPtr->m_quat;\n" +"\n" +" c->m_worldPos[i] = transform( &p, &bodyAPos, &bodyAQuat );\n" +" c->m_worldPos[i].w = lCPoints[i].w - collisionMargin;\n" +" }\n" +"\n" +" if( GET_LOCAL_IDX == 0 )\n" +" {\n" +" float4 contactNormal;\n" +" contactNormal = ShapeDataQueryNormal( &shapeData[GET_SHAPE_IDX(*bodyAPtr)], center );\n" +" contactNormal = normalize3( qtRotate( bodyAPtr->m_quat, contactNormal ) );\n" +"\n" +" c->m_worldNormal = contactNormal;\n" +"// c->m_restituitionCoeff = 0.f;\n" +"// c->m_frictionCoeff = 0.7f;\n" +" c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);\n" +" GET_NPOINTS(*c) = nContacts;\n" +" c->m_bodyAPtr = iPair[0].x;\n" +" c->m_bodyBPtr = iPair[0].y;\n" +" }\n" +" }\n" +" else\n" +" {\n" +" if( GET_LOCAL_IDX == 0 )\n" +" GET_NPOINTS(contactsOut[0]) = 0;\n" +" }\n" +"}\n" +"\n" +"// todo. make it better\n" +"void output2(__local BodyData* bodyAPtr, __local BodyData* bodyBPtr,\n" +" int pair0, int pair1,\n" +" __local int* lNContacts, __local float4* lCPoints,\n" +" float4 center, \n" +" const __global ShapeData* shapeData, __global Contact4* contactsOut, counter32_t nContactsOut, int capacity,\n" +" float collisionMargin )\n" +"{\n" +" int lIdx = GET_LOCAL_IDX%32;\n" +" int nContacts = min2( *lNContacts, 4 );\n" +" \n" +" GROUP_LDS_BARRIER;\n" +"\n" +" if( lIdx == 0 && nContacts)\n" +" {\n" +" int dstIdx;\n" +" AppendInc( nContactsOut, dstIdx );\n" +" *lNContacts = dstIdx;\n" +"\n" +" if( dstIdx >= capacity )\n" +" *lNContacts = -1;\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" bool canWrite = (*lNContacts!=-1);\n" +"\n" +" if( nContacts && canWrite )\n" +" {\n" +" __global Contact4* c = contactsOut + (*lNContacts);\n" +"\n" +" if( lIdx < nContacts )\n" +" {\n" +" int i = lIdx;\n" +" float4 p = lCPoints[i];\n" +" float4 bodyAPos = bodyAPtr->m_pos;\n" +" Quaternion bodyAQuat = bodyAPtr->m_quat;\n" +"\n" +" p = transform( &p, &bodyAPos, &bodyAQuat );\n" +" p.w = lCPoints[i].w - collisionMargin;\n" +" c->m_worldPos[i] = p;\n" +" }\n" +"\n" +" if( lIdx == 0 )\n" +" {\n" +" if( nContacts )\n" +" {\n" +" float4 contactNormal;\n" +" contactNormal = ShapeDataQueryNormal( &shapeData[GET_SHAPE_IDX(*bodyAPtr)], center );\n" +" contactNormal = normalize3( qtRotate( bodyAPtr->m_quat, contactNormal ) );\n" +"\n" +" c->m_worldNormal = contactNormal;\n" +"// c->m_restituitionCoeff = 0.f;\n" +"// c->m_frictionCoeff = 0.7f;\n" +" c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);\n" +" c->m_bodyAPtr = pair0;\n" +" c->m_bodyBPtr = pair1;\n" +" }\n" +" GET_NPOINTS(*c) = nContacts;\n" +" }\n" +" }\n" +"}\n" +"\n" +"__inline\n" +"void output2LDS(__local BodyData* bodyAPtr, __local BodyData* bodyBPtr,\n" +" int pair0, int pair1,\n" +" int lNContacts, __local float4* lCPoints,\n" +" float4 center, \n" +" const __global ShapeData* shapeData, __local Contact4* contactsOut,\n" +" float collisionMargin )\n" +"{\n" +" int lIdx = GET_LOCAL_IDX%32;\n" +"// int lIdx = GET_LOCAL_IDX;\n" +"// int groupIdx = 0;\n" +"\n" +" int nContacts = min2( lNContacts, 4 );\n" +" \n" +" GROUP_LDS_BARRIER;\n" +"\n" +" if( nContacts != 0 )\n" +" {\n" +" if( lIdx < nContacts )\n" +" {\n" +" int i = lIdx;\n" +" float4 p = lCPoints[i];\n" +" float4 bodyAPos = bodyAPtr->m_pos;\n" +" Quaternion bodyAQuat = bodyAPtr->m_quat;\n" +"\n" +" p = transform( &p, &bodyAPos, &bodyAQuat );\n" +" p.w = lCPoints[i].w - collisionMargin;\n" +" contactsOut->m_worldPos[i] = p;\n" +" }\n" +" }\n" +"\n" +" if( lIdx == 0 )\n" +" {\n" +" if( nContacts != 0 )\n" +" {\n" +" float4 contactNormal;\n" +" contactNormal = ShapeDataQueryNormal( &shapeData[GET_SHAPE_IDX(*bodyAPtr)], center );\n" +" contactNormal = normalize3( qtRotate( bodyAPtr->m_quat, contactNormal ) );\n" +"\n" +" contactsOut->m_worldNormal = contactNormal;\n" +"// contactsOut->m_worldNormal = make_float4(1.5f,1.4f,1.3f,0.f);\n" +"// contactsOut->m_restituitionCoeff = 0.f;\n" +"// contactsOut->m_frictionCoeff = 0.7f;\n" +" contactsOut->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);\n" +" contactsOut->m_bodyAPtr = pair0;\n" +" contactsOut->m_bodyBPtr = pair1;\n" +" }\n" +" GET_NPOINTS(*contactsOut) = nContacts;//nContacts;\n" +" }\n" +"\n" +"// contactsOut[groupIdx].m_worldNormal = make_float4(1.5f,1.4f,1.3f,0.f);\n" +"}\n" +"\n" +"void output2_1(__local BodyData* bodyAPtr, __local BodyData* bodyBPtr,\n" +" int pair0, int pair1,\n" +" __local int* lNContacts, __local float4* lCPoints,\n" +" float4 center, float4 nA, \n" +" const __global ShapeData* shapeData, __global Contact4* contactsOut, counter32_t nContactsOut, int capacity, float collisionMargin )\n" +"{\n" +" int lIdx = GET_LOCAL_IDX;\n" +" int nContacts = min2( *lNContacts, 4 );\n" +" \n" +" GROUP_LDS_BARRIER;\n" +"\n" +" if( lIdx == 0 && nContacts)\n" +" {\n" +" int dstIdx;\n" +" AppendInc( nContactsOut, dstIdx );\n" +" *lNContacts = dstIdx;\n" +"\n" +" if( dstIdx >= capacity )\n" +" *lNContacts = -1;\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" bool canWrite = (*lNContacts!=-1);\n" +"\n" +" if( nContacts && canWrite )\n" +" {\n" +" __global Contact4* c = contactsOut + (*lNContacts);\n" +"\n" +" if( lIdx < nContacts )\n" +" {\n" +" int i = lIdx;\n" +" float4 p = lCPoints[i];\n" +" float4 bodyAPos = bodyAPtr->m_pos;\n" +" Quaternion bodyAQuat = bodyAPtr->m_quat;\n" +"\n" +" p = transform( &p, &bodyAPos, &bodyAQuat );\n" +" p.w = lCPoints[i].w - collisionMargin;\n" +" c->m_worldPos[i] = p;\n" +" }\n" +"\n" +" if( lIdx == 0 )\n" +" {\n" +" if( nContacts )\n" +" {\n" +" float4 contactNormal;\n" +" contactNormal = nA;//ShapeDataQueryNormal( &shapeData[GET_SHAPE_IDX(*bodyAPtr)], center );\n" +" contactNormal = normalize3( qtRotate( bodyAPtr->m_quat, contactNormal ) );\n" +"\n" +" c->m_worldNormal = contactNormal;\n" +"// c->m_restituitionCoeff = 0.f;\n" +"// c->m_frictionCoeff = 0.7f;\n" +" c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);\n" +" c->m_bodyAPtr = pair0;\n" +" c->m_bodyBPtr = pair1;\n" +" }\n" +" GET_NPOINTS(*c) = nContacts;\n" +" }\n" +" }\n" +"}\n" +"\n" +"__kernel\n" +"void manifold(__global float4* vIn, __global float4* vOut)\n" +"{\n" +" __local float4 lCPoints[32];\n" +" __local float4 lManifoldBuffer[64];\n" +" __local int lNContacts;\n" +" __local float4 ab;\n" +"\n" +" if( GET_LOCAL_IDX<32 )\n" +" {\n" +" lCPoints[GET_LOCAL_IDX] = vIn[GET_GLOBAL_IDX];\n" +" }\n" +"\n" +" if( GET_LOCAL_IDX == 0 ) \n" +" {\n" +" lNContacts = 32;\n" +" ab = vIn[GET_GLOBAL_IDX];\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" float4 center = extractManifold( lCPoints, lManifoldBuffer, &lNContacts, ab );\n" +"\n" +" if( GET_LOCAL_IDX < lNContacts )\n" +" {\n" +" vOut[4*GET_GROUP_IDX+GET_LOCAL_IDX] = lCPoints[GET_LOCAL_IDX];\n" +" }\n" +"\n" +"}\n" +"\n" +"//#define COMBINE_REDUCTION \n" +"\n" +"__kernel\n" +"__attribute__((reqd_work_group_size(64, 1, 1)))\n" +"void NarrowphaseKernel( const __global int2* restrict pairs, const __global ShapeData* shapeData, const __global BodyData* restrict bodyDatas, \n" +" __global Contact4* restrict contactsOut,\n" +" counter32_t nContactsOut, ConstBuffer cb ) \n" +"{\n" +" // 2.5K LDS\n" +" __local Contact4 ldsContacts[2];\n" +" __local BodyData bodyA;\n" +" __local BodyData bodyB;\n" +" __local ShapeDeviceData shapeA;\n" +" __local ShapeDeviceData shapeB;\n" +" __local float4 lCPointsA[32*2];\n" +" __local int lNContactsA;\n" +" __local float4* lCPointsB = lCPointsA+32;\n" +" __local int lNContactsB;\n" +"#ifdef COMBINE_REDUCTION\n" +" __local float4 lManifoldBuffer[64*2];\n" +"#else\n" +" __local float4 lManifoldBuffer[64];\n" +"#endif\n" +" __local int2 iPairAB;\n" +"\n" +" const int capacity = cb.m_capacity;\n" +" const float collisionMargin = cb.m_collisionMargin;\n" +"\n" +"\n" +" int pairIdx = GET_GROUP_IDX;\n" +"// for(int pairIdx = GET_GROUP_IDX; pairIdxm_height4[idx] = shapeData[ myShapeIdx ].m_height4[idx];\n" +"\n" +" idx+=32;\n" +" }\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" testVtx2( &bodyA, &bodyB, &shapeA, &shapeB, &lNContactsA, lCPointsA, &lNContactsB, lCPointsB, collisionMargin );\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" float4 ab = bodyB.m_pos - bodyA.m_pos;\n" +" float4 center[2];\n" +"\n" +" if( lNContactsA != 0 || lNContactsB != 0 )\n" +" {\n" +" float4 abInA;\n" +" abInA = qtInvRotate( bodyA.m_quat, ab );\n" +"\n" +" float4 abInB;\n" +" abInB = qtInvRotate( bodyB.m_quat, ab );\n" +"\n" +"#ifdef COMBINE_REDUCTION\n" +" extractManifold2( lCPointsA, &lNContactsA, abInA,\n" +" lCPointsB, &lNContactsB, abInB,\n" +" lManifoldBuffer, center );\n" +"#else\n" +" if( lNContactsA != 0 )\n" +" center[0] = extractManifold( lCPointsA, lManifoldBuffer, &lNContactsA, abInA );\n" +" if( lNContactsB != 0 )\n" +" center[1] = extractManifold( lCPointsB, lManifoldBuffer, &lNContactsB, abInB );\n" +"#endif\n" +" }\n" +"\n" +" int firstSet = GET_LOCAL_IDX/32;\n" +"\n" +"/*\n" +" if( GET_LOCAL_IDX == 0 ) // for debug\n" +" {\n" +" ldsContacts[0].m_worldNormal = make_float4(-1,-1,-1,0);\n" +" ldsContacts[0].m_bodyAPtr = 0;\n" +" ldsContacts[0].m_bodyBPtr = 0;\n" +" ldsContacts[0].m_batchIdx = 111;\n" +" ldsContacts[1].m_worldNormal = make_float4(-1,-1,-1,0);\n" +" ldsContacts[1].m_bodyAPtr = 0;\n" +" ldsContacts[1].m_bodyBPtr = 0;\n" +" ldsContacts[1].m_batchIdx = 111;\n" +" }\n" +"*/\n" +" bool doReduction = true;\n" +" if( doReduction )\n" +" {\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" output2LDS( (firstSet)?&bodyA: &bodyB, (firstSet)?&bodyB : &bodyA, \n" +" (firstSet)?iPairAB.x : iPairAB.y, (firstSet)?iPairAB.y : iPairAB.x, \n" +" (firstSet)?lNContactsA : lNContactsB, (firstSet)?lCPointsA:lCPointsB, \n" +" (firstSet)?center[0] : center[1], shapeData, (firstSet)?&ldsContacts[0]: &ldsContacts[1], collisionMargin );\n" +"\n" +" GROUP_LDS_BARRIER;\n" +" \n" +" if( GET_LOCAL_IDX == 0 )\n" +" {\n" +" if( lNContactsA && lNContactsB )\n" +" {\n" +" float nDotn = dot3F4( ldsContacts[0].m_worldNormal, ldsContacts[1].m_worldNormal );\n" +" if( nDotn < -(1.f-0.01f) )\n" +" {\n" +" if( ldsContacts[0].m_bodyAPtr > ldsContacts[1].m_bodyAPtr )\n" +" lNContactsA = 0;\n" +" else\n" +" lNContactsB = 0;\n" +" }\n" +" }\n" +" }\n" +" \n" +" if( GET_LOCAL_IDX == 0 )\n" +" {\n" +" int n = lNContactsA;\n" +" if( n != 0 )\n" +" {\n" +" int dstIdx;\n" +" AppendInc( nContactsOut, dstIdx );\n" +" if( dstIdx < capacity )\n" +" { int idx = 0;\n" +" contactsOut[ dstIdx ] = ldsContacts[idx];\n" +" contactsOut[ dstIdx].m_batchIdx = pairIdx;\n" +" }\n" +" }\n" +"\n" +" n = lNContactsB;\n" +" if( n != 0 )\n" +" {\n" +" int dstIdx;\n" +" AppendInc( nContactsOut, dstIdx );\n" +" if( dstIdx < capacity )\n" +" { int idx = 1;\n" +" contactsOut[ dstIdx ] = ldsContacts[idx];\n" +" contactsOut[ dstIdx].m_batchIdx = pairIdx;\n" +" }\n" +" }\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +" }\n" +" else\n" +" {\n" +" //output2( (firstSet)?&bodyA: &bodyB, (firstSet)?&bodyB : &bodyA, \n" +" // (firstSet)?iPairAB.x : iPairAB.y, (firstSet)?iPairAB.y : iPairAB.x, \n" +" // (firstSet)?&lNContactsA : &lNContactsB, (firstSet)?lCPointsA:lCPointsB, \n" +" // (firstSet)?center[0] : center[1], shapeData, contactsOut, nContactsOut, capacity, collisionMargin );\n" +" }\n" +" }\n" +"}\n" +"\n" +"\n" +"__kernel\n" +"__attribute__((reqd_work_group_size(64, 1, 1)))\n" +"void NarrowphaseWithPlaneKernel( const __global int2* restrict pairs, const __global ShapeData* shapeData, const __global BodyData* restrict bodyDatas, \n" +" __global Contact4* restrict contactsOut,\n" +" counter32_t nContactsOut, ConstBuffer cb ) \n" +"{\n" +" // 2.5K LDS\n" +" __local BodyData bodyA;\n" +" __local BodyData bodyB;\n" +" __local ShapeDeviceData shapeA;\n" +" __local ShapeDeviceData shapeB;\n" +" __local float4 lCPointsA[32*2];\n" +" __local int lNContactsA;\n" +"// __local float4* lCPointsB = lCPointsA+32;\n" +"// __local int lNContactsB;\n" +" __local float4 lManifoldBuffer[64];\n" +" __local int2 iPairAB;\n" +"\n" +" const int capacity = cb.m_capacity;\n" +" const float collisionMargin = cb.m_collisionMargin;\n" +"\n" +" int pairIdx = GET_GROUP_IDX;\n" +" {\n" +" if( GET_LOCAL_IDX == 0 ) // load Bodies\n" +" {\n" +" int2 pair = pairs[pairIdx];\n" +" iPairAB = make_int2(pair.x, pair.y);\n" +" bodyA = bodyDatas[ pair.x ];\n" +" bodyB = bodyDatas[ pair.y ];\n" +" shapeA.m_scale = shapeData[ GET_SHAPE_IDX(bodyA) ].m_scale;\n" +" shapeB.m_scale = shapeData[ GET_SHAPE_IDX(bodyB) ].m_scale;\n" +" lNContactsA = 0;\n" +"// lNContactsB = 0;\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" if (bodyB.m_invMass == 0.f)\n" +" return;\n" +" \n" +" // todo. can check if the shape is the same to previous one. If same, dont read\n" +" { // load shape data\n" +" int idx = GET_LOCAL_IDX%32;\n" +" int bIdx = GET_LOCAL_IDX/32;\n" +" __local ShapeDeviceData* myShape = (bIdx==0)?&shapeA: &shapeB;\n" +" int myShapeIdx = (bIdx==0)?GET_SHAPE_IDX(bodyA): GET_SHAPE_IDX(bodyB);\n" +"\n" +" while( idx < HEIGHT_RES*HEIGHT_RES*6/4 )\n" +" {\n" +" myShape->m_height4[idx] = shapeData[ myShapeIdx ].m_height4[idx];\n" +"\n" +" idx+=32;\n" +" }\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" float4 nA = make_float4(0,1,0,0);\n" +"\n" +"\n" +"// testVtx2( &bodyA, &bodyB, &shapeA, &shapeB, &lNContactsA, lCPointsA, &lNContactsB, lCPointsB );\n" +" testVtxWithPlane( &bodyA, &bodyB, nA, &shapeB, &lNContactsA, lCPointsA, collisionMargin );\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +"// float4 ab = bodyB.m_pos - bodyA.m_pos;\n" +" float4 center[2];\n" +"\n" +" if( lNContactsA != 0 )\n" +" {\n" +" float4 abInA;\n" +" abInA = nA;//qtInvRotate( bodyA.m_quat, ab );\n" +"\n" +" if( lNContactsA != 0 )\n" +" center[0] = extractManifold( lCPointsA, lManifoldBuffer, &lNContactsA, abInA );\n" +" }\n" +"\n" +"// int firstSet = GET_LOCAL_IDX/32;\n" +"\n" +" output2_1( &bodyA, &bodyB, \n" +" iPairAB.x, iPairAB.y, \n" +" &lNContactsA, lCPointsA, \n" +" center[0], nA, shapeData, contactsOut, nContactsOut, capacity, collisionMargin );\n" +" }\n" +"}\n" +; diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/Solver.h b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/Solver.h new file mode 100644 index 000000000..2a2382ac1 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/Solver.h @@ -0,0 +1,203 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +#pragma once +#ifndef __ADL_SOLVER_H +#define __ADL_SOLVER_H + + +#include +#include +#include +#include +#include +#include + +//#include +#include "AdlRigidBody.h" +#include "AdlContact4.h" + +//#include "AdlPhysics/Batching/Batching.h> + + +#define MYF4 float4 +#define MAKE_MYF4 make_float4 + +//#define MYF4 float4sse +//#define MAKE_MYF4 make_float4sse + +#include "AdlConstraint4.h" + +namespace adl +{ +class SolverBase +{ + public: + + + struct ConstraintData + { + ConstraintData(): m_b(0.f), m_appliedRambdaDt(0.f) {} + + float4 m_linear; // have to be normalized + float4 m_angular0; + float4 m_angular1; + float m_jacCoeffInv; + float m_b; + float m_appliedRambdaDt; + + u32 m_bodyAPtr; + u32 m_bodyBPtr; + + bool isInvalid() const { return ((u32)m_bodyAPtr+(u32)m_bodyBPtr) == 0; } + float getFrictionCoeff() const { return m_linear.w; } + void setFrictionCoeff(float coeff) { m_linear.w = coeff; } + }; + + struct ConstraintCfg + { + ConstraintCfg( float dt = 0.f ): m_positionDrift( 0.005f ), m_positionConstraintCoeff( 0.2f ), m_dt(dt), m_staticIdx(-1) {} + + float m_positionDrift; + float m_positionConstraintCoeff; + float m_dt; + bool m_enableParallelSolve; + float m_averageExtent; + int m_staticIdx; + }; + + static + __inline + Buffer* allocateContact4( const Device* device, int capacity ) + { + return new Buffer( device, capacity ); + } + + static + __inline + void deallocateContact4( Buffer* data ) { delete data; } + + static + __inline + SolverData allocateConstraint4( const Device* device, int capacity ) + { + return new Buffer( device, capacity ); + } + + static + __inline + void deallocateConstraint4( SolverData data ) { delete (Buffer*)data; } + + static + __inline + void* allocateFrictionConstraint( const Device* device, int capacity, u32 type = 0 ) + { + return 0; + } + + static + __inline + void deallocateFrictionConstraint( void* data ) + { + } + + enum + { + N_SPLIT = 16, + N_BATCHES = 4, + N_OBJ_PER_SPLIT = 10, + N_TASKS_PER_BATCH = N_SPLIT*N_SPLIT, + }; +}; + +template +class Solver : public SolverBase +{ + public: + typedef Launcher::BufferInfo BufferInfo; + + struct Data + { + Data() : m_nIterations(4){} + + const Device* m_device; + void* m_parallelSolveData; + int m_nIterations; + Kernel* m_batchingKernel; + Kernel* m_batchSolveKernel; + Kernel* m_contactToConstraintKernel; + Kernel* m_setSortDataKernel; + Kernel* m_reorderContactKernel; + Kernel* m_copyConstraintKernel; + //typename RadixSort::Data* m_sort; + typename RadixSort32::Data* m_sort32; + typename BoundSearch::Data* m_search; + typename PrefixScan::Data* m_scan; + Buffer* m_sortDataBuffer; + Buffer* m_contactBuffer; + }; + + enum + { + DYNAMIC_CONTACT_ALLOCATION_THRESHOLD = 2000000, + }; + + static + Data* allocate( const Device* device, int pairCapacity ); + + static + void deallocate( Data* data ); + + static + void reorderConvertToConstraints( Data* data, const Buffer* bodyBuf, + const Buffer* shapeBuf, + Buffer* contactsIn, SolverData contactCOut, void* additionalData, + int nContacts, const ConstraintCfg& cfg ); + + static + void solveContactConstraint( Data* data, const Buffer* bodyBuf, const Buffer* inertiaBuf, + SolverData constraint, void* additionalData, int n ); + +// static +// int createSolveTasks( int batchIdx, Data* data, const Buffer* bodyBuf, const Buffer* shapeBuf, +// SolverData constraint, int n, ThreadPool::Task* tasksOut[], int taskCapacity ); + + + //private: + static + void convertToConstraints( Data* data, const Buffer* bodyBuf, + const Buffer* shapeBuf, + Buffer* contactsIn, SolverData contactCOut, void* additionalData, + int nContacts, const ConstraintCfg& cfg ); + + static + void sortContacts( Data* data, const Buffer* bodyBuf, + Buffer* contactsIn, void* additionalData, + int nContacts, const ConstraintCfg& cfg ); + + static + void batchContacts( Data* data, Buffer* contacts, int nContacts, Buffer* n, Buffer* offsets, int staticIdx ); + +}; + +#include "Solver.inl" +#include "SolverHost.inl" +}; + +#undef MYF4 +#undef MAKE_MYF4 + +#endif //__ADL_SOLVER_H diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/Solver.inl b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/Solver.inl new file mode 100644 index 000000000..3fc5a2f9f --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/Solver.inl @@ -0,0 +1,762 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +#define PATH "..\\..\\dynamics\\basic_demo\\Stubs\\SolverKernels" +#define BATCHING_PATH "..\\..\\dynamics\\basic_demo\\Stubs\\batchingKernels" + +#define KERNEL1 "SingleBatchSolveKernel" +#define KERNEL2 "BatchSolveKernel" + +#define KERNEL3 "ContactToConstraintKernel" +#define KERNEL4 "SetSortDataKernel" +#define KERNEL5 "ReorderContactKernel" +#include "SolverKernels.h" + +#include "batchingKernels.h" + + +struct SolverDebugInfo +{ + int m_valInt0; + int m_valInt1; + int m_valInt2; + int m_valInt3; + + int m_valInt4; + int m_valInt5; + int m_valInt6; + int m_valInt7; + + int m_valInt8; + int m_valInt9; + int m_valInt10; + int m_valInt11; + + int m_valInt12; + int m_valInt13; + int m_valInt14; + int m_valInt15; + + + float m_val0; + float m_val1; + float m_val2; + float m_val3; +}; + + + + +class SolverDeviceInl +{ +public: + struct ParallelSolveData + { + Buffer* m_numConstraints; + Buffer* m_offsets; + }; +}; + +template +typename Solver::Data* Solver::allocate( const Device* device, int pairCapacity ) +{ + const char* src[] = +#if defined(ADL_LOAD_KERNEL_FROM_STRING) + {solverKernelsCL, 0}; +#else + {0,0}; +#endif + + const char* src2[] = +#if defined(ADL_LOAD_KERNEL_FROM_STRING) + {batchingKernelsCL, 0}; +#else + {0,0}; +#endif + + + + + Data* data = new Data; + data->m_device = device; + bool cacheBatchingKernel = true; + data->m_batchingKernel = device->getKernel( BATCHING_PATH, "CreateBatches", "-I ..\\..\\ ", src2[TYPE],cacheBatchingKernel); + //data->m_batchingKernel = device->getKernel( BATCHING_PATH, "CreateBatches", "-I ..\\..\\ ", 0,cacheBatchingKernel); + bool cacheSolverKernel = true; + + data->m_batchSolveKernel = device->getKernel( PATH, KERNEL2, "-I ..\\..\\ ", src[TYPE],cacheSolverKernel ); + data->m_contactToConstraintKernel = device->getKernel( PATH, KERNEL3, + "-I ..\\..\\ ", src[TYPE] ); + data->m_setSortDataKernel = device->getKernel( PATH, KERNEL4, + "-I ..\\..\\ ", src[TYPE] ); + data->m_reorderContactKernel = device->getKernel( PATH, KERNEL5, + "-I ..\\..\\ ", src[TYPE] ); + + data->m_copyConstraintKernel = device->getKernel( PATH, "CopyConstraintKernel", + "-I ..\\..\\ ", src[TYPE] ); + + data->m_parallelSolveData = new SolverDeviceInl::ParallelSolveData; + { + SolverDeviceInl::ParallelSolveData* solveData = (SolverDeviceInl::ParallelSolveData*)data->m_parallelSolveData; + solveData->m_numConstraints = new Buffer( device, N_SPLIT*N_SPLIT ); + solveData->m_offsets = new Buffer( device, N_SPLIT*N_SPLIT ); + } + const int sortSize = NEXTMULTIPLEOF( pairCapacity, 512 ); + + + //data->m_sort = RadixSort::allocate( data->m_device, sortSize );//todo. remove hardcode this + data->m_sort32 = RadixSort32::allocate( data->m_device, sortSize );//todo. remove hardcode this + + data->m_search = BoundSearch::allocate( data->m_device, N_SPLIT*N_SPLIT ); + data->m_scan = PrefixScan::allocate( data->m_device, N_SPLIT*N_SPLIT ); + + data->m_sortDataBuffer = new Buffer( data->m_device, sortSize ); + + if( pairCapacity < DYNAMIC_CONTACT_ALLOCATION_THRESHOLD ) + data->m_contactBuffer = new Buffer( data->m_device, pairCapacity ); + else + data->m_contactBuffer = 0; + + return data; +} + +template +void Solver::deallocate( Data* data ) +{ + { + SolverDeviceInl::ParallelSolveData* solveData = (SolverDeviceInl::ParallelSolveData*)data->m_parallelSolveData; + delete solveData->m_numConstraints; + delete solveData->m_offsets; + delete solveData; + } + +// RadixSort::deallocate( data->m_sort ); + RadixSort32::deallocate(data->m_sort32); + BoundSearch::deallocate( data->m_search ); + PrefixScan::deallocate( data->m_scan ); + + delete data->m_sortDataBuffer; + if( data->m_contactBuffer ) delete data->m_contactBuffer; + + delete data; +} + +template +void Solver::reorderConvertToConstraints( typename Solver::Data* data, const Buffer* bodyBuf, + const Buffer* shapeBuf, + Buffer* contactsIn, SolverData contactCOut, void* additionalData, + int nContacts, const typename Solver::ConstraintCfg& cfg ) +{ + if( data->m_contactBuffer ) + { + if( data->m_contactBuffer->getSize() < nContacts ) + { + BT_PROFILE("delete data->m_contactBuffer;"); + delete data->m_contactBuffer; + data->m_contactBuffer = 0; + } + } + if( data->m_contactBuffer == 0 ) + { + BT_PROFILE("new data->m_contactBuffer;"); + + data->m_contactBuffer = new Buffer( data->m_device, nContacts ); + } + Stopwatch sw; + + Buffer* contactNative = BufferUtils::map( data->m_device, contactsIn, nContacts ); + + //DeviceUtils::Config dhCfg; + //Device* deviceHost = DeviceUtils::allocate( TYPE_HOST, dhCfg ); + if( cfg.m_enableParallelSolve ) + { + SolverDeviceInl::ParallelSolveData* nativeSolveData = (SolverDeviceInl::ParallelSolveData*)data->m_parallelSolveData; + + DeviceUtils::waitForCompletion( data->m_device ); + sw.start(); + // contactsIn -> data->m_contactBuffer + { + BT_PROFILE("sortContacts"); + Solver::sortContacts( data, bodyBuf, contactNative, additionalData, nContacts, cfg ); + DeviceUtils::waitForCompletion( data->m_device ); + } + sw.split(); + if(0) + { + Contact4* tmp = new Contact4[nContacts]; + data->m_contactBuffer->read( tmp, nContacts ); + DeviceUtils::waitForCompletion( data->m_contactBuffer->m_device ); + contactNative->write( tmp, nContacts ); + DeviceUtils::waitForCompletion( contactNative->m_device ); + delete [] tmp; + } + else + { + BT_PROFILE("m_copyConstraintKernel"); + + Buffer constBuffer( data->m_device, 1, BufferBase::BUFFER_CONST ); + + int4 cdata; cdata.x = nContacts; + BufferInfo bInfo[] = { BufferInfo( data->m_contactBuffer ), BufferInfo( contactNative ) }; +// Launcher launcher( data->m_device, data->m_device->getKernel( PATH, "CopyConstraintKernel", "-I ..\\..\\ -Wf,--c++", 0 ) ); + Launcher launcher( data->m_device, data->m_copyConstraintKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( constBuffer, cdata ); + launcher.launch1D( nContacts, 64 ); + DeviceUtils::waitForCompletion( data->m_device ); + } + { + BT_PROFILE("batchContacts"); + Solver::batchContacts( data, contactNative, nContacts, nativeSolveData->m_numConstraints, nativeSolveData->m_offsets, cfg.m_staticIdx ); + + } + } + { + BT_PROFILE("waitForCompletion (batchContacts)"); + DeviceUtils::waitForCompletion( data->m_device ); + } + sw.split(); + //================ + if(0) + { +// Solver::Data* solverHost = Solver::allocate( deviceHost, nContacts ); +// Solver::convertToConstraints( solverHost, bodyBuf, shapeBuf, contactNative, contactCOut, additionalData, nContacts, cfg ); +// Solver::deallocate( solverHost ); + } + else + { + BT_PROFILE("convertToConstraints"); + Solver::convertToConstraints( data, bodyBuf, shapeBuf, contactNative, contactCOut, additionalData, nContacts, cfg ); + } + { + BT_PROFILE("convertToConstraints waitForCompletion"); + DeviceUtils::waitForCompletion( data->m_device ); + } + sw.stop(); + + { + BT_PROFILE("printf"); + + float t[5]; + sw.getMs( t, 3 ); +// printf("%3.2f, %3.2f, %3.2f, ", t[0], t[1], t[2]); + } + + { + BT_PROFILE("deallocate and unmap"); + + //DeviceUtils::deallocate( deviceHost ); + + BufferUtils::unmap( contactNative, contactsIn, nContacts ); + } +} + + +template +void Solver::solveContactConstraint( typename Solver::Data* data, const Buffer* bodyBuf, const Buffer* shapeBuf, + SolverData constraint, void* additionalData, int n ) +{ + if(0) + { + DeviceUtils::Config dhCfg; + Device* deviceHost = DeviceUtils::allocate( TYPE_HOST, dhCfg ); + { + Solver::Data* hostData = Solver::allocate( deviceHost, 0 ); + Solver::solveContactConstraint( hostData, bodyBuf, shapeBuf, constraint, additionalData, n ); + Solver::deallocate( hostData ); + } + DeviceUtils::deallocate( deviceHost ); + return; + } + + ADLASSERT( data ); + + Buffer* cBuffer =0; + + Buffer* gBodyNative=0; + Buffer* gShapeNative =0; + Buffer* gConstraintNative =0; + + + { + BT_PROFILE("map"); + cBuffer = (Buffer*)constraint; + + gBodyNative= BufferUtils::map( data->m_device, bodyBuf ); + gShapeNative= BufferUtils::map( data->m_device, shapeBuf ); + gConstraintNative = BufferUtils::map( data->m_device, cBuffer ); + DeviceUtils::waitForCompletion( data->m_device ); + } + + Buffer constBuffer; + int4 cdata = make_int4( n, 0, 0, 0 ); + { + SolverDeviceInl::ParallelSolveData* solveData = (SolverDeviceInl::ParallelSolveData*)data->m_parallelSolveData; + const int nn = N_SPLIT*N_SPLIT; + + cdata.x = 0; + cdata.y = 250; + +#if 0 +//check how the cells are filled + unsigned int* hostCounts = new unsigned int[N_SPLIT*N_SPLIT]; + solveData->m_numConstraints->read(hostCounts,N_SPLIT*N_SPLIT); + DeviceUtils::waitForCompletion( data->m_device ); + for (int i=0;i gpuDebugInfo(data->m_device,numWorkItems); +#endif + + + + { + + BT_PROFILE("m_batchSolveKernel iterations"); + for(int iter=0; iterm_nIterations; iter++) + { + for(int ib=0; ibm_numConstraints ), + BufferInfo( solveData->m_offsets ) +#ifdef DEBUG_ME + , BufferInfo(&gpuDebugInfo) +#endif + }; + + Launcher launcher( data->m_device, data->m_batchSolveKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( constBuffer, cdata ); + + launcher.launch1D( numWorkItems, 64 ); + +#ifdef DEBUG_ME + DeviceUtils::waitForCompletion( data->m_device ); + gpuDebugInfo.read(debugInfo,numWorkItems); + DeviceUtils::waitForCompletion( data->m_device ); + for (int i=0;i0) + { + printf("debugInfo[i].m_valInt2 = %d\n",i,debugInfo[i].m_valInt2); + } + + if (debugInfo[i].m_valInt3>0) + { + printf("debugInfo[i].m_valInt3 = %d\n",i,debugInfo[i].m_valInt3); + } + } +#endif //DEBUG_ME + + + } + } + + DeviceUtils::waitForCompletion( data->m_device ); + + + } + + cdata.x = 1; + { + BT_PROFILE("m_batchSolveKernel iterations2"); + for(int iter=0; iterm_nIterations; iter++) + { + for(int ib=0; ibm_numConstraints ), + BufferInfo( solveData->m_offsets ) +#ifdef DEBUG_ME + ,BufferInfo(&gpuDebugInfo) +#endif //DEBUG_ME + }; + Launcher launcher( data->m_device, data->m_batchSolveKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( constBuffer, cdata ); + launcher.launch1D( 64*nn/N_BATCHES, 64 ); + } + } + DeviceUtils::waitForCompletion( data->m_device ); + + } +#ifdef DEBUG_ME + delete[] debugInfo; +#endif //DEBUG_ME + } + + { + BT_PROFILE("unmap"); + BufferUtils::unmap( gBodyNative, bodyBuf ); + BufferUtils::unmap( gShapeNative, shapeBuf ); + BufferUtils::unmap( gConstraintNative, cBuffer ); + DeviceUtils::waitForCompletion( data->m_device ); + } +} + +template +void Solver::convertToConstraints( typename Solver::Data* data, const Buffer* bodyBuf, + const Buffer* shapeBuf, + Buffer* contactsIn, SolverData contactCOut, void* additionalData, + int nContacts, const ConstraintCfg& cfg ) +{ + ADLASSERT( data->m_device->m_type == TYPE_CL ); + + Buffer* bodyNative =0; + Buffer* shapeNative =0; + Buffer* contactNative =0; + Buffer* constraintNative =0; + + { + BT_PROFILE("map buffers"); + + bodyNative = BufferUtils::map( data->m_device, bodyBuf ); + shapeNative = BufferUtils::map( data->m_device, shapeBuf ); + contactNative= BufferUtils::map( data->m_device, contactsIn ); + constraintNative = BufferUtils::map( data->m_device, (Buffer*)contactCOut ); + } + struct CB + { + int m_nContacts; + float m_dt; + float m_positionDrift; + float m_positionConstraintCoeff; + }; + + { + BT_PROFILE("m_contactToConstraintKernel"); + CB cdata; + cdata.m_nContacts = nContacts; + cdata.m_dt = cfg.m_dt; + cdata.m_positionDrift = cfg.m_positionDrift; + cdata.m_positionConstraintCoeff = cfg.m_positionConstraintCoeff; + + Buffer constBuffer( data->m_device, 1, BufferBase::BUFFER_CONST ); + BufferInfo bInfo[] = { BufferInfo( contactNative ), BufferInfo( bodyNative ), BufferInfo( shapeNative ), + BufferInfo( constraintNative )}; + Launcher launcher( data->m_device, data->m_contactToConstraintKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( constBuffer, cdata ); + launcher.launch1D( nContacts, 64 ); + DeviceUtils::waitForCompletion( data->m_device ); + + } + + { + BT_PROFILE("unmap"); + BufferUtils::unmap( bodyNative, bodyBuf ); + BufferUtils::unmap( shapeNative, shapeBuf ); + BufferUtils::unmap( contactNative, contactsIn ); + BufferUtils::unmap( constraintNative, (Buffer*)contactCOut ); + } +} + +template +void Solver::sortContacts( typename Solver::Data* data, const Buffer* bodyBuf, + Buffer* contactsIn, void* additionalData, + int nContacts, const typename Solver::ConstraintCfg& cfg ) +{ + ADLASSERT( data->m_device->m_type == TYPE_CL ); + Buffer* bodyNative + = BufferUtils::map( data->m_device, bodyBuf ); + Buffer* contactNative + = BufferUtils::map( data->m_device, contactsIn ); + + const int sortAlignment = 512; // todo. get this out of sort + if( cfg.m_enableParallelSolve ) + { + SolverDeviceInl::ParallelSolveData* nativeSolveData = (SolverDeviceInl::ParallelSolveData*)data->m_parallelSolveData; + + int sortSize = NEXTMULTIPLEOF( nContacts, sortAlignment ); + + Buffer* countsNative = nativeSolveData->m_numConstraints;//BufferUtils::map( data->m_device, &countsHost ); + Buffer* offsetsNative = nativeSolveData->m_offsets;//BufferUtils::map( data->m_device, &offsetsHost ); + + { // 2. set cell idx + struct CB + { + int m_nContacts; + int m_staticIdx; + float m_scale; + int m_nSplit; + }; + + ADLASSERT( sortSize%64 == 0 ); + CB cdata; + cdata.m_nContacts = nContacts; + cdata.m_staticIdx = cfg.m_staticIdx; + cdata.m_scale = 1.f/(N_OBJ_PER_SPLIT*cfg.m_averageExtent); + cdata.m_nSplit = N_SPLIT; + + Buffer constBuffer( data->m_device, 1, BufferBase::BUFFER_CONST ); + BufferInfo bInfo[] = { BufferInfo( contactNative ), BufferInfo( bodyNative ), BufferInfo( data->m_sortDataBuffer ) }; + Launcher launcher( data->m_device, data->m_setSortDataKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( constBuffer, cdata ); + launcher.launch1D( sortSize, 64 ); + } + + { // 3. sort by cell idx + int n = N_SPLIT*N_SPLIT; + int sortBit = 32; + //if( n <= 0xffff ) sortBit = 16; + //if( n <= 0xff ) sortBit = 8; + RadixSort32::execute( data->m_sort32, *data->m_sortDataBuffer,sortSize); + } + { // 4. find entries + BoundSearch::execute( data->m_search, *data->m_sortDataBuffer, nContacts, *countsNative, N_SPLIT*N_SPLIT, BoundSearchBase::COUNT ); + + PrefixScan::execute( data->m_scan, *countsNative, *offsetsNative, N_SPLIT*N_SPLIT ); + } + + { // 5. sort constraints by cellIdx + // todo. preallocate this +// ADLASSERT( contactsIn->getType() == TYPE_HOST ); +// Buffer* out = BufferUtils::map( data->m_device, contactsIn ); // copying contacts to this buffer + + { + Buffer constBuffer( data->m_device, 1, BufferBase::BUFFER_CONST ); + + int4 cdata; cdata.x = nContacts; + BufferInfo bInfo[] = { BufferInfo( contactNative ), BufferInfo( data->m_contactBuffer ), BufferInfo( data->m_sortDataBuffer ) }; + Launcher launcher( data->m_device, data->m_reorderContactKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( constBuffer, cdata ); + launcher.launch1D( nContacts, 64 ); + } +// BufferUtils::unmap( out, contactsIn, nContacts ); + } + } + + BufferUtils::unmap( bodyNative, bodyBuf ); + BufferUtils::unmap( contactNative, contactsIn ); +} + +template +void Solver::batchContacts( typename Solver::Data* data, Buffer* contacts, int nContacts, Buffer* n, Buffer* offsets, int staticIdx ) +{ + ADLASSERT( data->m_device->m_type == TYPE_CL ); + + if(0) + { + BT_PROFILE("CPU classTestKernel/Kernel (batch generation?)"); + + DeviceUtils::Config dhCfg; + Device* deviceHost = DeviceUtils::allocate( TYPE_HOST, dhCfg ); + { + Solver::Data* hostData = Solver::allocate( deviceHost, 0 ); + Solver::batchContacts( hostData, contacts, nContacts, n, offsets, staticIdx ); + Solver::deallocate( hostData ); + } + DeviceUtils::deallocate( deviceHost ); + return; + } + + Buffer* contactNative + = BufferUtils::map( data->m_device, contacts, nContacts ); + Buffer* nNative + = BufferUtils::map( data->m_device, n ); + Buffer* offsetsNative + = BufferUtils::map( data->m_device, offsets ); + + { + BT_PROFILE("GPU classTestKernel/Kernel (batch generation?)"); + Buffer constBuffer( data->m_device, 1, BufferBase::BUFFER_CONST ); + int4 cdata; + cdata.x = nContacts; + cdata.y = 0; + cdata.z = staticIdx; + + int numWorkItems = 64*N_SPLIT*N_SPLIT; +#ifdef BATCH_DEBUG + SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems]; + adl::Buffer gpuDebugInfo(data->m_device,numWorkItems); + memset(debugInfo,0,sizeof(SolverDebugInfo)*numWorkItems); + gpuDebugInfo.write(debugInfo,numWorkItems); +#endif + + + BufferInfo bInfo[] = { + BufferInfo( contactNative ), + BufferInfo( data->m_contactBuffer ), + BufferInfo( nNative ), + BufferInfo( offsetsNative ) +#ifdef BATCH_DEBUG + , BufferInfo(&gpuDebugInfo) +#endif + }; + + + + Launcher launcher( data->m_device, data->m_batchingKernel); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( constBuffer, cdata ); + launcher.launch1D( numWorkItems, 64 ); + DeviceUtils::waitForCompletion( data->m_device ); + +#ifdef BATCH_DEBUG + aaaa + Contact4* hostContacts = new Contact4[nContacts]; + data->m_contactBuffer->read(hostContacts,nContacts); + DeviceUtils::waitForCompletion( data->m_device ); + + gpuDebugInfo.read(debugInfo,numWorkItems); + DeviceUtils::waitForCompletion( data->m_device ); + + for (int i=0;i0) + { + printf("catch\n"); + } + if (debugInfo[i].m_valInt2>0) + { + printf("catch22\n"); + } + + if (debugInfo[i].m_valInt3>0) + { + printf("catch666\n"); + } + + if (debugInfo[i].m_valInt4>0) + { + printf("catch777\n"); + } + } + delete[] debugInfo; +#endif //BATCH_DEBUG + + } + + if(0) + { + u32* nhost = new u32[N_SPLIT*N_SPLIT]; + + nNative->read( nhost, N_SPLIT*N_SPLIT ); + + Contact4* chost = new Contact4[nContacts]; + data->m_contactBuffer->read( chost, nContacts ); + DeviceUtils::waitForCompletion( data->m_device ); + printf(">>"); + int nonzero = 0; + u32 maxn = 0; + for(int i=0; iwrite( *data->m_contactBuffer, nContacts ); + DeviceUtils::waitForCompletion( data->m_device ); + + if(0) + { + DeviceUtils::Config dhCfg; + Device* deviceHost = DeviceUtils::allocate( TYPE_HOST, dhCfg ); + { + HostBuffer host( deviceHost, nContacts ); + contactNative->read( host.m_ptr, nContacts ); + DeviceUtils::waitForCompletion( data->m_device ); + + for(int i=0; i( contactNative, contacts ); + BufferUtils::unmap( nNative, n ); + BufferUtils::unmap( offsetsNative, offsets ); +} + +#undef PATH +#undef KERNEL1 +#undef KERNEL2 + +#undef KERNEL3 +#undef KERNEL4 +#undef KERNEL5 diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/SolverHost.inl b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/SolverHost.inl new file mode 100644 index 000000000..a79205d8c --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/SolverHost.inl @@ -0,0 +1,848 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +class SolverInl +{ +public: + typedef SolverBase::ConstraintData ConstraintData; + + + static + __forceinline + void setLinearAndAngular(const MYF4& n, const MYF4& r0, const MYF4& r1, + MYF4& linear, MYF4& angular0, MYF4& angular1) + { + linear = -n; + angular0 = -cross3(r0, n); + angular1 = cross3(r1, n); + } + + static + __forceinline + float calcJacCoeff(const MYF4& linear0, const MYF4& linear1, const MYF4& angular0, const MYF4& angular1, + float invMass0, const Matrix3x3& invInertia0, float invMass1, const Matrix3x3& invInertia1) + { + // linear0,1 are normlized + float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0; + float jmj1 = dot3F4(mtMul3(angular0,invInertia0), angular0); + float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1; + float jmj3 = dot3F4(mtMul3(angular1,invInertia1), angular1); + return -1.f/(jmj0+jmj1+jmj2+jmj3); + } + static + __forceinline + float calcRelVel(const MYF4& l0, const MYF4& l1, const MYF4& a0, const MYF4& a1, + const MYF4& linVel0, const MYF4& angVel0, const MYF4& linVel1, const MYF4& angVel1) + { + return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1); + } + + static + __forceinline + void setConstraint4( const MYF4& posA, const MYF4& linVelA, const MYF4& angVelA, float invMassA, const Matrix3x3& invInertiaA, + const MYF4& posB, const MYF4& linVelB, const MYF4& angVelB, float invMassB, const Matrix3x3& invInertiaB, + const Contact4& src, const SolverBase::ConstraintCfg& cfg, + Constraint4& dstC ) + { + dstC.m_bodyA = (u32)src.m_bodyAPtr; + dstC.m_bodyB = (u32)src.m_bodyBPtr; + + float dtInv = 1.f/cfg.m_dt; + for(int ic=0; ic<4; ic++) + { + dstC.m_appliedRambdaDt[ic] = 0.f; + } + dstC.m_fJacCoeffInv[0] = dstC.m_fJacCoeffInv[1] = 0.f; + + + const MYF4& n = src.m_worldNormal; + dstC.m_linear = -n; + dstC.setFrictionCoeff( src.getFrictionCoeff() ); + for(int ic=0; ic<4; ic++) + { + MYF4 r0 = src.m_worldPos[ic] - posA; + MYF4 r1 = src.m_worldPos[ic] - posB; + + if( ic >= src.getNPoints() ) + { + dstC.m_jacCoeffInv[ic] = 0.f; + continue; + } + + float relVelN; + { + MYF4 linear, angular0, angular1; + setLinearAndAngular(n, r0, r1, linear, angular0, angular1); + + dstC.m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1, + invMassA, invInertiaA, invMassB, invInertiaB ); + + relVelN = calcRelVel(linear, -linear, angular0, angular1, + linVelA, angVelA, linVelB, angVelB); + + float e = src.getRestituitionCoeff(); + if( relVelN*relVelN < 0.004f ) e = 0.f; + + dstC.m_b[ic] = e*relVelN; + dstC.m_b[ic] += (src.getPenetration(ic) + cfg.m_positionDrift)*cfg.m_positionConstraintCoeff*dtInv; + dstC.m_appliedRambdaDt[ic] = 0.f; + } + } + + if( src.getNPoints() > 1 ) + { // prepare friction + MYF4 center = MAKE_MYF4(0.f); + for(int i=0; i 0.95f || (invMassA == 0.f || invMassB == 0.f)) + { + float angNA = dot3F4( n, angVelA ); + float angNB = dot3F4( n, angVelB ); + + angVelA -= (angNA*0.1f)*n; + angVelB -= (angNB*0.1f)*n; + } + } + } + + template + static + __inline + void solveContact(Constraint4& cs, + const MYF4& posA, MYF4& linVelA, MYF4& angVelA, float invMassA, const Matrix3x3& invInertiaA, + const MYF4& posB, MYF4& linVelB, MYF4& angVelB, float invMassB, const Matrix3x3& invInertiaB, + float maxRambdaDt[4], float minRambdaDt[4]) + { + MYF4 dLinVelA = MAKE_MYF4(0.f); + MYF4 dAngVelA = MAKE_MYF4(0.f); + MYF4 dLinVelB = MAKE_MYF4(0.f); + MYF4 dAngVelB = MAKE_MYF4(0.f); + + for(int ic=0; ic<4; ic++) + { + // dont necessary because this makes change to 0 + if( cs.m_jacCoeffInv[ic] == 0.f ) continue; + + { + MYF4 angular0, angular1, linear; + MYF4 r0 = cs.m_worldPos[ic] - posA; + MYF4 r1 = cs.m_worldPos[ic] - posB; + setLinearAndAngular( -cs.m_linear, r0, r1, linear, angular0, angular1 ); + + float rambdaDt = calcRelVel(cs.m_linear, -cs.m_linear, angular0, angular1, + linVelA, angVelA, linVelB, angVelB ) + cs.m_b[ic]; + rambdaDt *= cs.m_jacCoeffInv[ic]; + + { + float prevSum = cs.m_appliedRambdaDt[ic]; + float updated = prevSum; + updated += rambdaDt; + updated = max2( updated, minRambdaDt[ic] ); + updated = min2( updated, maxRambdaDt[ic] ); + rambdaDt = updated - prevSum; + cs.m_appliedRambdaDt[ic] = updated; + } + + MYF4 linImp0 = invMassA*linear*rambdaDt; + MYF4 linImp1 = invMassB*(-linear)*rambdaDt; + MYF4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt; + MYF4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt; + + if( JACOBI ) + { + dLinVelA += linImp0; + dAngVelA += angImp0; + dLinVelB += linImp1; + dAngVelB += angImp1; + } + else + { + linVelA += linImp0; + angVelA += angImp0; + linVelB += linImp1; + angVelB += angImp1; + } + } + } + + if( JACOBI ) + { + linVelA += dLinVelA; + angVelA += dAngVelA; + linVelB += dLinVelB; + angVelB += dAngVelB; + } + } + + enum + { + N_SPLIT = SolverBase::N_SPLIT, + }; + + // for parallel solve + struct ParallelSolveData + { + u32 m_n[N_SPLIT*N_SPLIT]; + u32 m_offset[N_SPLIT*N_SPLIT]; + }; + + static + __inline + int sortConstraintByBatch(Contact4* cs, int n, int ignoreIdx, int simdWidth = -1) + { + SortData* sortData; + { + BT_PROFILE("new"); + sortData = new SortData[n]; + } + + u32* idxBuffer = new u32[n]; + u32* idxSrc = idxBuffer; + u32* idxDst = idxBuffer; + int nIdxSrc, nIdxDst; + + const int N_FLG = 256; + const int FLG_MASK = N_FLG-1; + u32 flg[N_FLG/32]; +#if defined(_DEBUG) + for(int i=0; i sortBuffer; sortBuffer.setRawPtr( deviceHost, sortData, n ); + RadixSort::Data* sort = RadixSort::allocate( deviceHost, n ); + + RadixSort::execute( sort, sortBuffer, n ); + + RadixSort::deallocate( sort ); + } + DeviceUtils::deallocate( deviceHost ); + } + + { + BT_PROFILE("reorder"); + // reorder + Contact4* old = new Contact4[n]; + memcpy( old, cs, sizeof(Contact4)*n); + for(int i=0; i* bodies, const Buffer* shapes, const Buffer* constraints, + int start, int nConstraints) + : m_bodies( bodies ), m_shapes( shapes ), m_constraints( constraints ), m_start( start ), m_nConstraints( nConstraints ), + m_solveFriction( true ){} + + u16 getType(){ return 0; } + + void run(int tIdx) + { + HostBuffer& hBody = *(HostBuffer*)m_bodies; + HostBuffer& hShape = *(HostBuffer*)m_shapes; + HostBuffer& hc = *(HostBuffer*)m_constraints; + + for(int ic=0; ic( hc[i], bodyA.m_pos, (MYF4&)bodyA.m_linVel, (MYF4&)bodyA.m_angVel, bodyA.m_invMass, hShape[aIdx].m_invInertia, + bodyB.m_pos, (MYF4&)bodyB.m_linVel, (MYF4&)bodyB.m_angVel, bodyB.m_invMass, hShape[bIdx].m_invInertia, + maxRambdaDt, minRambdaDt ); + } + else + { + float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; + float minRambdaDt[4] = {0.f,0.f,0.f,0.f}; + + float sum = 0; + for(int j=0; j<4; j++) + { + sum +=hc[i].m_appliedRambdaDt[j]; + } + frictionCoeff = 0.7f; + for(int j=0; j<4; j++) + { + maxRambdaDt[j] = frictionCoeff*sum; + minRambdaDt[j] = -maxRambdaDt[j]; + } + + SolverInl::solveFriction( hc[i], bodyA.m_pos, (MYF4&)bodyA.m_linVel, (MYF4&)bodyA.m_angVel, bodyA.m_invMass, hShape[aIdx].m_invInertia, + bodyB.m_pos, (MYF4&)bodyB.m_linVel, (MYF4&)bodyB.m_angVel, bodyB.m_invMass, hShape[bIdx].m_invInertia, + maxRambdaDt, minRambdaDt ); + } + } + } + + const Buffer* m_bodies; + const Buffer* m_shapes; + const Buffer* m_constraints; + int m_start; + int m_nConstraints; + bool m_solveFriction; +}; + + +template<> +static Solver::Data* Solver::allocate( const Device* device, int pairCapacity ) +{ + Solver::Data* data = new Data; + data->m_device = device; + data->m_parallelSolveData = 0; + + return data; +} + +template<> +static void Solver::deallocate( Solver::Data* data ) +{ + if( data->m_parallelSolveData ) delete (SolverInl::ParallelSolveData*)data->m_parallelSolveData; + delete data; +} + + +void sortContacts2( Solver::Data* data, const Buffer* bodyBuf, + Buffer* contactsIn, void* additionalData, + int nContacts, const Solver::ConstraintCfg& cfg ) +{ + ADLASSERT( data->m_device->m_type == TYPE_HOST ); + HostBuffer* bodyNative + = (HostBuffer*)BufferUtils::map( data->m_device, bodyBuf ); + HostBuffer* contactNative + = (HostBuffer*)BufferUtils::map( data->m_device, contactsIn); + + if( cfg.m_enableParallelSolve ) + { + ADLASSERT( data->m_parallelSolveData == 0 ); + data->m_parallelSolveData = new SolverInl::ParallelSolveData; + SolverInl::ParallelSolveData* solveData = (SolverInl::ParallelSolveData*)data->m_parallelSolveData; + + HostBuffer sortData( data->m_device, nContacts ); + { // 2. set cell idx + float spacing = adl::SolverBase::N_OBJ_PER_SPLIT*cfg.m_averageExtent; + float xScale = 1.f/spacing; + for(int i=0; i= 0 && xIdx < adl::SolverBase::N_SPLIT ); + ADLASSERT( zIdx >= 0 && zIdx < adl::SolverBase::N_SPLIT ); + sortData[i].m_key = (xIdx+zIdx*adl::SolverBase::N_SPLIT); + sortData[i].m_value = i; + } + } + + { // 3. sort by cell idx + RadixSort::Data* sData = RadixSort::allocate( data->m_device, nContacts ); + + RadixSort::execute( sData, sortData, nContacts ); + + RadixSort::deallocate( sData ); + } + + { // 4. find entries + HostBuffer counts; counts.setRawPtr( data->m_device, solveData->m_n, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT ); + HostBuffer offsets; offsets.setRawPtr( data->m_device, solveData->m_offset, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT ); + { + BoundSearch::Data* sData = BoundSearch::allocate( data->m_device ); + PrefixScan::Data* pData = PrefixScan::allocate( data->m_device, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT ); + + BoundSearch::execute( sData, sortData, nContacts, counts, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT, BoundSearchBase::COUNT ); + + PrefixScan::execute( pData, counts, offsets, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT ); + + BoundSearch::deallocate( sData ); + PrefixScan::deallocate( pData ); + } +#if defined(_DEBUG) + { + HostBuffer n0( data->m_device, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT ); + HostBuffer offset0( data->m_device, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT ); + for(int i=0; im_ptr, sizeof(Contact4)*nContacts ); + for(int i=0; i( bodyNative, bodyBuf ); + BufferUtils::unmap( contactNative, contactsIn ); +} + +static void reorderConvertToConstraints2( Solver::Data* data, const Buffer* bodyBuf, + const Buffer* shapeBuf, + adl::Buffer* contactsIn, SolverData contactCOut, void* additionalData, + int nContacts, const Solver::ConstraintCfg& cfg ) +{ + + + sortContacts2( data, bodyBuf, contactsIn, additionalData, nContacts, cfg ); + + { + SolverInl::ParallelSolveData* solveData = (SolverInl::ParallelSolveData*)data->m_parallelSolveData; + Buffer n; n.setRawPtr( data->m_device, solveData->m_n, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT ); + Buffer offsets; offsets.setRawPtr( data->m_device, solveData->m_offset, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT ); + Solver::batchContacts( data, contactsIn, nContacts, &n, &offsets, cfg.m_staticIdx ); + printf("hello\n"); + } + + Solver::convertToConstraints( data, bodyBuf, shapeBuf, contactsIn, contactCOut, additionalData, nContacts, cfg ); +} + +template +static void solveContactConstraint( Solver::Data* data, const Buffer* bodyBuf, const Buffer* shapeBuf, + SolverData constraint, void* additionalData, int n ) +{ + + Buffer* bodyNative + = BufferUtils::map( data->m_device, bodyBuf ); + Buffer* shapeNative + = BufferUtils::map( data->m_device, shapeBuf ); + Buffer* constraintNative + = BufferUtils::map( data->m_device, (const Buffer*)constraint ); + + for(int iter=0; iterm_nIterations; iter++) + { + SolveTask task( bodyNative, shapeNative, constraintNative, 0, n ); + task.m_solveFriction = false; + task.run(0); + } + + for(int iter=0; iterm_nIterations; iter++) + { + SolveTask task( bodyNative, shapeNative, constraintNative, 0, n ); + task.m_solveFriction = true; + task.run(0); + } + + BufferUtils::unmap( bodyNative, bodyBuf ); + BufferUtils::unmap( shapeNative, shapeBuf ); + BufferUtils::unmap( constraintNative, (const Buffer*)constraint ); +} + +#if 0 +static +int createSolveTasks( int batchIdx, Data* data, const Buffer* bodyBuf, const Buffer* shapeBuf, + SolverData constraint, int n, ThreadPool::Task* tasksOut[], int taskCapacity ) +{ +/* + ADLASSERT( (N_SPLIT&1) == 0 ); + ADLASSERT( batchIdx < N_BATCHES ); + ADLASSERT( data->m_device->m_type == TYPE_HOST ); + ADLASSERT( data->m_parallelSolveData ); + + SolverInl::ParallelSolveData* solveData = (SolverInl::ParallelSolveData*)data->m_parallelSolveData; + data->m_batchIdx = 0; + + const int nx = N_SPLIT/2; + + int nTasksCreated = 0; + +// for(int ii=0; ii<2; ii++) + for(batchIdx=0; batchIdx<4; batchIdx++) + { + int2 offset = make_int2( batchIdx&1, batchIdx>>1 ); + for(int ix=0; ixm_n[cellIdx]; + int start = solveData->m_offset[cellIdx]; + + if( n == 0 ) continue; + + SolveTask* task = new SolveTask( bodyBuf, shapeBuf, (const Buffer*)constraint, start, n ); +// task->m_solveFriction = (ii==0)? false:true; + tasksOut[nTasksCreated++] = task; + } + } + + return nTasksCreated; +*/ + ADLASSERT(0); + return 0; +} +#endif + + + +static void convertToConstraints2( Solver::Data* data, const Buffer* bodyBuf, + const Buffer* shapeBuf, + Buffer* contactsIn, SolverData contactCOut, void* additionalData, + int nContacts, const Solver::ConstraintCfg& cfg ) +{ + ADLASSERT( data->m_device->m_type == TYPE_HOST ); + + HostBuffer* bodyNative + = (HostBuffer*)BufferUtils::map( data->m_device, bodyBuf ); + HostBuffer* shapeNative + = (HostBuffer*)BufferUtils::map( data->m_device, shapeBuf ); + HostBuffer* contactNative + = (HostBuffer*)BufferUtils::map( data->m_device, contactsIn ); + HostBuffer* constraintNative + = (HostBuffer*)BufferUtils::map( data->m_device, (Buffer*)contactCOut ); + + { +#if !defined(_DEBUG) +#pragma omp parallel for +#endif + for(int i=0; i( bodyNative, bodyBuf ); + BufferUtils::unmap( shapeNative, shapeBuf ); + BufferUtils::unmap( contactNative, contactsIn ); + BufferUtils::unmap( constraintNative, (Buffer*)contactCOut ); +} + + + + + +static void batchContacts2( Solver::Data* data, Buffer* contacts, int nContacts, Buffer* n, Buffer* offsets, int staticIdx ) +{ + ADLASSERT( data->m_device->m_type == TYPE_HOST ); + + HostBuffer* contactNative =0; + HostBuffer* nNative =0; + HostBuffer* offsetsNative =0; + + int sz = sizeof(Contact4); + int sz2 = sizeof(int2); + { + BT_PROFILE("BufferUtils::map"); + contactNative = (HostBuffer*)BufferUtils::map( data->m_device, contacts, nContacts ); + } + { + BT_PROFILE("BufferUtils::map2"); + nNative = (HostBuffer*)BufferUtils::map( data->m_device, n ); + offsetsNative= (HostBuffer*)BufferUtils::map( data->m_device, offsets ); + } + + + { + BT_PROFILE("sortConstraintByBatch"); + int numNonzeroGrid=0; + int maxNumBatches = 0; + + for(int i=0; im_ptr+offset, n, staticIdx,-1 ); // on GPU + maxNumBatches = max(numBatches,maxNumBatches); + + // SolverInl::sortConstraintByBatch( contactNative->m_ptr+offset, n, staticIdx ); // on CPU + } + } + + printf("maxNumBatches = %d\n", maxNumBatches); + } + + { + BT_PROFILE("BufferUtils::unmap"); + BufferUtils::unmap( contactNative, contacts, nContacts ); + } + { + BT_PROFILE("BufferUtils::unmap2"); + BufferUtils::unmap( nNative, n ); + BufferUtils::unmap( offsetsNative, offsets ); + } + + +} + + diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/SolverKernels.cl b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/SolverKernels.cl new file mode 100644 index 000000000..e46194391 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/SolverKernels.cl @@ -0,0 +1,1051 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +#pragma OPENCL EXTENSION cl_amd_printf : enable +#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable +#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable +#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable +#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable + + +#ifdef cl_ext_atomic_counters_32 +#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable +#else +#define counter32_t volatile global int* +#endif + +typedef unsigned int u32; +typedef unsigned short u16; +typedef unsigned char u8; + +#define GET_GROUP_IDX get_group_id(0) +#define GET_LOCAL_IDX get_local_id(0) +#define GET_GLOBAL_IDX get_global_id(0) +#define GET_GROUP_SIZE get_local_size(0) +#define GET_NUM_GROUPS get_num_groups(0) +#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) +#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) +#define AtomInc(x) atom_inc(&(x)) +#define AtomInc1(x, out) out = atom_inc(&(x)) +#define AppendInc(x, out) out = atomic_inc(x) +#define AtomAdd(x, value) atom_add(&(x), value) +#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value ) +#define AtomXhg(x, value) atom_xchg ( &(x), value ) + + +#define SELECT_UINT4( b, a, condition ) select( b,a,condition ) + +#define make_float4 (float4) +#define make_float2 (float2) +#define make_uint4 (uint4) +#define make_int4 (int4) +#define make_uint2 (uint2) +#define make_int2 (int2) + + +#define max2 max +#define min2 min + + +/////////////////////////////////////// +// Vector +/////////////////////////////////////// +__inline +float fastDiv(float numerator, float denominator) +{ + return native_divide(numerator, denominator); +// return numerator/denominator; +} + +__inline +float4 fastDiv4(float4 numerator, float4 denominator) +{ + return native_divide(numerator, denominator); +} + +__inline +float fastSqrtf(float f2) +{ + return native_sqrt(f2); +// return sqrt(f2); +} + +__inline +float fastRSqrt(float f2) +{ + return native_rsqrt(f2); +} + +__inline +float fastLength4(float4 v) +{ + return fast_length(v); +} + +__inline +float4 fastNormalize4(float4 v) +{ + return fast_normalize(v); +} + + +__inline +float sqrtf(float a) +{ +// return sqrt(a); + return native_sqrt(a); +} + +__inline +float4 cross3(float4 a, float4 b) +{ + return cross(a,b); +} + +__inline +float dot3F4(float4 a, float4 b) +{ + float4 a1 = make_float4(a.xyz,0.f); + float4 b1 = make_float4(b.xyz,0.f); + return dot(a1, b1); +} + +__inline +float length3(const float4 a) +{ + return sqrtf(dot3F4(a,a)); +} + +__inline +float dot4(const float4 a, const float4 b) +{ + return dot( a, b ); +} + +// for height +__inline +float dot3w1(const float4 point, const float4 eqn) +{ + return dot3F4(point,eqn) + eqn.w; +} + +__inline +float4 normalize3(const float4 a) +{ + float4 n = make_float4(a.x, a.y, a.z, 0.f); + return fastNormalize4( n ); +// float length = sqrtf(dot3F4(a, a)); +// return 1.f/length * a; +} + +__inline +float4 normalize4(const float4 a) +{ + float length = sqrtf(dot4(a, a)); + return 1.f/length * a; +} + +__inline +float4 createEquation(const float4 a, const float4 b, const float4 c) +{ + float4 eqn; + float4 ab = b-a; + float4 ac = c-a; + eqn = normalize3( cross3(ab, ac) ); + eqn.w = -dot3F4(eqn,a); + return eqn; +} + +/////////////////////////////////////// +// Matrix3x3 +/////////////////////////////////////// + +typedef struct +{ + float4 m_row[3]; +}Matrix3x3; + +__inline +Matrix3x3 mtZero(); + +__inline +Matrix3x3 mtIdentity(); + +__inline +Matrix3x3 mtTranspose(Matrix3x3 m); + +__inline +Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b); + +__inline +float4 mtMul1(Matrix3x3 a, float4 b); + +__inline +float4 mtMul3(float4 a, Matrix3x3 b); + +__inline +Matrix3x3 mtZero() +{ + Matrix3x3 m; + m.m_row[0] = (float4)(0.f); + m.m_row[1] = (float4)(0.f); + m.m_row[2] = (float4)(0.f); + return m; +} + +__inline +Matrix3x3 mtIdentity() +{ + Matrix3x3 m; + m.m_row[0] = (float4)(1,0,0,0); + m.m_row[1] = (float4)(0,1,0,0); + m.m_row[2] = (float4)(0,0,1,0); + return m; +} + +__inline +Matrix3x3 mtTranspose(Matrix3x3 m) +{ + Matrix3x3 out; + out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f); + out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f); + out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f); + return out; +} + +__inline +Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b) +{ + Matrix3x3 transB; + transB = mtTranspose( b ); + Matrix3x3 ans; + // why this doesn't run when 0ing in the for{} + a.m_row[0].w = 0.f; + a.m_row[1].w = 0.f; + a.m_row[2].w = 0.f; + for(int i=0; i<3; i++) + { +// a.m_row[i].w = 0.f; + ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]); + ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]); + ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]); + ans.m_row[i].w = 0.f; + } + return ans; +} + +__inline +float4 mtMul1(Matrix3x3 a, float4 b) +{ + float4 ans; + ans.x = dot3F4( a.m_row[0], b ); + ans.y = dot3F4( a.m_row[1], b ); + ans.z = dot3F4( a.m_row[2], b ); + ans.w = 0.f; + return ans; +} + +__inline +float4 mtMul3(float4 a, Matrix3x3 b) +{ + float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0); + float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0); + float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0); + + float4 ans; + ans.x = dot3F4( a, colx ); + ans.y = dot3F4( a, coly ); + ans.z = dot3F4( a, colz ); + return ans; +} + +/////////////////////////////////////// +// Quaternion +/////////////////////////////////////// + +typedef float4 Quaternion; + +__inline +Quaternion qtMul(Quaternion a, Quaternion b); + +__inline +Quaternion qtNormalize(Quaternion in); + +__inline +float4 qtRotate(Quaternion q, float4 vec); + +__inline +Quaternion qtInvert(Quaternion q); + +__inline +Matrix3x3 qtGetRotationMatrix(Quaternion q); + + + +__inline +Quaternion qtMul(Quaternion a, Quaternion b) +{ + Quaternion ans; + ans = cross3( a, b ); + ans += a.w*b+b.w*a; +// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); + ans.w = a.w*b.w - dot3F4(a, b); + return ans; +} + +__inline +Quaternion qtNormalize(Quaternion in) +{ + return fastNormalize4(in); +// in /= length( in ); +// return in; +} +__inline +float4 qtRotate(Quaternion q, float4 vec) +{ + Quaternion qInv = qtInvert( q ); + float4 vcpy = vec; + vcpy.w = 0.f; + float4 out = qtMul(qtMul(q,vcpy),qInv); + return out; +} + +__inline +Quaternion qtInvert(Quaternion q) +{ + return (Quaternion)(-q.xyz, q.w); +} + +__inline +float4 qtInvRotate(const Quaternion q, float4 vec) +{ + return qtRotate( qtInvert( q ), vec ); +} + +__inline +Matrix3x3 qtGetRotationMatrix(Quaternion quat) +{ + float4 quat2 = (float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f); + Matrix3x3 out; + + out.m_row[0].x=1-2*quat2.y-2*quat2.z; + out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z; + out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y; + out.m_row[0].w = 0.f; + + out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z; + out.m_row[1].y=1-2*quat2.x-2*quat2.z; + out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x; + out.m_row[1].w = 0.f; + + out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y; + out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x; + out.m_row[2].z=1-2*quat2.x-2*quat2.y; + out.m_row[2].w = 0.f; + + return out; +} + + + + +#define WG_SIZE 64 + +typedef struct +{ + float4 m_pos; + Quaternion m_quat; + float4 m_linVel; + float4 m_angVel; + + u32 m_shapeIdx; + u32 m_shapeType; + float m_invMass; + float m_restituitionCoeff; + float m_frictionCoeff; +} Body; + +typedef struct +{ + Matrix3x3 m_invInertia; + Matrix3x3 m_initInvInertia; +} Shape; + +typedef struct +{ + float4 m_linear; + float4 m_worldPos[4]; + float4 m_center; + float m_jacCoeffInv[4]; + float m_b[4]; + float m_appliedRambdaDt[4]; + + float m_fJacCoeffInv[2]; + float m_fAppliedRambdaDt[2]; + + u32 m_bodyA; + u32 m_bodyB; + + int m_batchIdx; + u32 m_paddings[1]; +} Constraint4; + +typedef struct +{ + float4 m_worldPos[4]; + float4 m_worldNormal; + u32 m_coeffs; + int m_batchIdx; + + u32 m_bodyAPtr; + u32 m_bodyBPtr; +} Contact4; + +typedef struct +{ + int m_nConstraints; + int m_start; + int m_batchIdx; + int m_nSplit; +// int m_paddings[1]; +} ConstBuffer; + +typedef struct +{ + int m_solveFriction; + int m_maxBatch; // long batch really kills the performance + int m_batchIdx; + int m_nSplit; +// int m_paddings[1]; +} ConstBufferBatchSolve; + + +void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1) +{ + *linear = -n; + *angular0 = -cross3(r0, n); + *angular1 = cross3(r1, n); +} + + +float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 ) +{ + return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1); +} + + +float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1, + float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1) +{ + // linear0,1 are normlized + float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0; + float jmj1 = dot3F4(mtMul3(angular0,*invInertia0), angular0); + float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1; + float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1); + return -1.f/(jmj0+jmj1+jmj2+jmj3); +} + + + +void solveContact(__global Constraint4* cs, + float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA, + float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB) +{ + float minRambdaDt = 0; + float maxRambdaDt = FLT_MAX; + + for(int ic=0; ic<4; ic++) + { + if( cs->m_jacCoeffInv[ic] == 0.f ) continue; + + float4 angular0, angular1, linear; + float4 r0 = cs->m_worldPos[ic] - posA; + float4 r1 = cs->m_worldPos[ic] - posB; + setLinearAndAngular( -cs->m_linear, r0, r1, &linear, &angular0, &angular1 ); + + float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, + *linVelA, *angVelA, *linVelB, *angVelB ) + cs->m_b[ic]; + rambdaDt *= cs->m_jacCoeffInv[ic]; + + { + float prevSum = cs->m_appliedRambdaDt[ic]; + float updated = prevSum; + updated += rambdaDt; + updated = max2( updated, minRambdaDt ); + updated = min2( updated, maxRambdaDt ); + rambdaDt = updated - prevSum; + cs->m_appliedRambdaDt[ic] = updated; + } + + float4 linImp0 = invMassA*linear*rambdaDt; + float4 linImp1 = invMassB*(-linear)*rambdaDt; + float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt; + float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt; + + *linVelA += linImp0; + *angVelA += angImp0; + *linVelB += linImp1; + *angVelB += angImp1; + } +} + + +void solveFriction(__global Constraint4* cs, + float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA, + float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB, + float maxRambdaDt[4], float minRambdaDt[4]) +{ + if( cs->m_fJacCoeffInv[0] == 0 && cs->m_fJacCoeffInv[0] == 0 ) return; + const float4 center = cs->m_center; + + float4 n = -cs->m_linear; + + float4 tangent[2]; + tangent[0] = cross3( n, cs->m_worldPos[0]-center ); + tangent[1] = cross3( tangent[0], n ); + tangent[0] = normalize3( tangent[0] ); + tangent[1] = normalize3( tangent[1] ); + + float4 angular0, angular1, linear; + float4 r0 = center - posA; + float4 r1 = center - posB; + for(int i=0; i<2; i++) + { + setLinearAndAngular( tangent[i], r0, r1, &linear, &angular0, &angular1 ); + float rambdaDt = calcRelVel(linear, -linear, angular0, angular1, + *linVelA, *angVelA, *linVelB, *angVelB ); + rambdaDt *= cs->m_fJacCoeffInv[i]; + + { + float prevSum = cs->m_fAppliedRambdaDt[i]; + float updated = prevSum; + updated += rambdaDt; + updated = max2( updated, minRambdaDt[i] ); + updated = min2( updated, maxRambdaDt[i] ); + rambdaDt = updated - prevSum; + cs->m_fAppliedRambdaDt[i] = updated; + } + + float4 linImp0 = invMassA*linear*rambdaDt; + float4 linImp1 = invMassB*(-linear)*rambdaDt; + float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt; + float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt; + + *linVelA += linImp0; + *angVelA += angImp0; + *linVelB += linImp1; + *angVelB += angImp1; + } + { // angular damping for point constraint + float4 ab = normalize3( posB - posA ); + float4 ac = normalize3( center - posA ); + if( dot3F4( ab, ac ) > 0.95f || (invMassA == 0.f || invMassB == 0.f)) + { + float angNA = dot3F4( n, *angVelA ); + float angNB = dot3F4( n, *angVelB ); + + *angVelA -= (angNA*0.1f)*n; + *angVelB -= (angNB*0.1f)*n; + } + } +} + +void solveAConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs) +{ + float frictionCoeff = ldsCs[0].m_linear.w; + int aIdx = ldsCs[0].m_bodyA; + int bIdx = ldsCs[0].m_bodyB; + + float4 posA = gBodies[aIdx].m_pos; + float4 linVelA = gBodies[aIdx].m_linVel; + float4 angVelA = gBodies[aIdx].m_angVel; + float invMassA = gBodies[aIdx].m_invMass; + Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia; + + float4 posB = gBodies[bIdx].m_pos; + float4 linVelB = gBodies[bIdx].m_linVel; + float4 angVelB = gBodies[bIdx].m_angVel; + float invMassB = gBodies[bIdx].m_invMass; + Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia; + + + { + solveContact( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA, + posB, &linVelB, &angVelB, invMassB, invInertiaB ); + } + + { + float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; + float minRambdaDt[4] = {0.f,0.f,0.f,0.f}; + + float sum = 0; + for(int j=0; j<4; j++) + { + sum +=ldsCs[0].m_appliedRambdaDt[j]; + } + frictionCoeff = 0.7f; + for(int j=0; j<4; j++) + { + maxRambdaDt[j] = frictionCoeff*sum; + minRambdaDt[j] = -maxRambdaDt[j]; + } + + solveFriction( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA, + posB, &linVelB, &angVelB, invMassB, invInertiaB, maxRambdaDt, minRambdaDt ); + } + + gBodies[aIdx].m_linVel = linVelA; + gBodies[aIdx].m_angVel = angVelA; + gBodies[bIdx].m_linVel = linVelB; + gBodies[bIdx].m_angVel = angVelB; +} + +void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs) +{ + float frictionCoeff = ldsCs[0].m_linear.w; + int aIdx = ldsCs[0].m_bodyA; + int bIdx = ldsCs[0].m_bodyB; + + float4 posA = gBodies[aIdx].m_pos; + float4 linVelA = gBodies[aIdx].m_linVel; + float4 angVelA = gBodies[aIdx].m_angVel; + float invMassA = gBodies[aIdx].m_invMass; + Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia; + + float4 posB = gBodies[bIdx].m_pos; + float4 linVelB = gBodies[bIdx].m_linVel; + float4 angVelB = gBodies[bIdx].m_angVel; + float invMassB = gBodies[bIdx].m_invMass; + Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia; + + solveContact( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA, + posB, &linVelB, &angVelB, invMassB, invInertiaB ); + + gBodies[aIdx].m_linVel = linVelA; + gBodies[aIdx].m_angVel = angVelA; + gBodies[bIdx].m_linVel = linVelB; + gBodies[bIdx].m_angVel = angVelB; +} + +void solveFrictionConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs) +{ + float frictionCoeff = ldsCs[0].m_linear.w; + int aIdx = ldsCs[0].m_bodyA; + int bIdx = ldsCs[0].m_bodyB; + + float4 posA = gBodies[aIdx].m_pos; + float4 linVelA = gBodies[aIdx].m_linVel; + float4 angVelA = gBodies[aIdx].m_angVel; + float invMassA = gBodies[aIdx].m_invMass; + Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia; + + float4 posB = gBodies[bIdx].m_pos; + float4 linVelB = gBodies[bIdx].m_linVel; + float4 angVelB = gBodies[bIdx].m_angVel; + float invMassB = gBodies[bIdx].m_invMass; + Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia; + + { + float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; + float minRambdaDt[4] = {0.f,0.f,0.f,0.f}; + + float sum = 0; + for(int j=0; j<4; j++) + { + sum +=ldsCs[0].m_appliedRambdaDt[j]; + } + frictionCoeff = 0.7f; + for(int j=0; j<4; j++) + { + maxRambdaDt[j] = frictionCoeff*sum; + minRambdaDt[j] = -maxRambdaDt[j]; + } + + solveFriction( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA, + posB, &linVelB, &angVelB, invMassB, invInertiaB, maxRambdaDt, minRambdaDt ); + } + + gBodies[aIdx].m_linVel = linVelA; + gBodies[aIdx].m_angVel = angVelA; + gBodies[bIdx].m_linVel = linVelB; + gBodies[bIdx].m_angVel = angVelB; +} + +typedef struct +{ + int m_valInt0; + int m_valInt1; + int m_valInt2; + int m_valInt3; + + float m_val0; + float m_val1; + float m_val2; + float m_val3; +} SolverDebugInfo; + + +__kernel +__attribute__((reqd_work_group_size(WG_SIZE,1,1))) +//void BatchSolveKernel(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* gConstraints, __global int* gN, __global int* gOffsets, __global SolverDebugInfo* debugInfo, ConstBufferBatchSolve cb) +void BatchSolveKernel(__global Body* gBodies, +__global Shape* gShapes, +__global Constraint4* gConstraints, +__global int* gN, +__global int* gOffsets, +ConstBufferBatchSolve cb) +{ + __local int ldsBatchIdx[WG_SIZE+1]; + + __local int ldsCurBatch; + __local int ldsNextBatch; + __local int ldsStart; + + int lIdx = GET_LOCAL_IDX; + int wgIdx = GET_GROUP_IDX; + + int gIdx = GET_GLOBAL_IDX; +// debugInfo[gIdx].m_valInt0 = gIdx; + //debugInfo[gIdx].m_valInt1 = GET_GROUP_SIZE; + + const int solveFriction = cb.m_solveFriction; + const int maxBatch = cb.m_maxBatch; + const int bIdx = cb.m_batchIdx; + const int nSplit = cb.m_nSplit; + + int xIdx = (wgIdx/(nSplit/2))*2 + (bIdx&1); + int yIdx = (wgIdx%(nSplit/2))*2 + (bIdx>>1); + int cellIdx = xIdx+yIdx*nSplit; + + if( gN[cellIdx] == 0 ) + return; + + const int start = gOffsets[cellIdx]; + const int end = start + gN[cellIdx]; + + + if( lIdx == 0 ) + { + ldsCurBatch = 0; + ldsNextBatch = 0; + ldsStart = start; + } + + + GROUP_LDS_BARRIER; + + int idx=ldsStart+lIdx; + while (ldsCurBatch < maxBatch) + { + for(; idxm_bodyA = src.m_bodyAPtr; + dstC->m_bodyB = src.m_bodyBPtr; + + float dtInv = 1.f/dt; + for(int ic=0; ic<4; ic++) + { + dstC->m_appliedRambdaDt[ic] = 0.f; + } + dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f; + + + dstC->m_linear = -src.m_worldNormal; + dstC->m_linear.w = 0.7f ;//src.getFrictionCoeff() ); + for(int ic=0; ic<4; ic++) + { + float4 r0 = src.m_worldPos[ic] - posA; + float4 r1 = src.m_worldPos[ic] - posB; + + if( ic >= src.m_worldNormal.w )//npoints + { + dstC->m_jacCoeffInv[ic] = 0.f; + continue; + } + + float relVelN; + { + float4 linear, angular0, angular1; + setLinearAndAngular(src.m_worldNormal, r0, r1, &linear, &angular0, &angular1); + + dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1, + invMassA, &invInertiaA, invMassB, &invInertiaB ); + + relVelN = calcRelVel(linear, -linear, angular0, angular1, + linVelA, angVelA, linVelB, angVelB); + + float e = 0.f;//src.getRestituitionCoeff(); + if( relVelN*relVelN < 0.004f ) e = 0.f; + + dstC->m_b[ic] = e*relVelN; + //float penetration = src.m_worldPos[ic].w; + dstC->m_b[ic] += (src.m_worldPos[ic].w + positionDrift)*positionConstraintCoeff*dtInv; + dstC->m_appliedRambdaDt[ic] = 0.f; + } + } + + if( src.m_worldNormal.w > 1 )//npoints + { // prepare friction + float4 center = make_float4(0.f); + for(int i=0; im_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1, + invMassA, &invInertiaA, invMassB, &invInertiaB ); + dstC->m_fAppliedRambdaDt[i] = 0.f; + } + dstC->m_center = center; + } + else + { + // single point constraint + } + + for(int i=0; i<4; i++) + { + if( im_worldPos[i] = src.m_worldPos[i]; + } + else + { + dstC->m_worldPos[i] = make_float4(0.f); + } + } +} + +typedef struct +{ + int m_nContacts; + float m_dt; + float m_positionDrift; + float m_positionConstraintCoeff; +} ConstBufferCTC; + +__kernel +__attribute__((reqd_work_group_size(WG_SIZE,1,1))) +void ContactToConstraintKernel(__global Contact4* gContact, __global Body* gBodies, __global Shape* gShapes, __global Constraint4* gConstraintOut, ConstBufferCTC cb) +{ + int gIdx = GET_GLOBAL_IDX; + int nContacts = cb.m_nContacts; + float dt = cb.m_dt; + float positionDrift = cb.m_positionDrift; + float positionConstraintCoeff = cb.m_positionConstraintCoeff; + + if( gIdx < nContacts ) + { + int aIdx = gContact[gIdx].m_bodyAPtr; + int bIdx = gContact[gIdx].m_bodyBPtr; + + float4 posA = gBodies[aIdx].m_pos; + float4 linVelA = gBodies[aIdx].m_linVel; + float4 angVelA = gBodies[aIdx].m_angVel; + float invMassA = gBodies[aIdx].m_invMass; + Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia; + + float4 posB = gBodies[bIdx].m_pos; + float4 linVelB = gBodies[bIdx].m_linVel; + float4 angVelB = gBodies[bIdx].m_angVel; + float invMassB = gBodies[bIdx].m_invMass; + Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia; + + Constraint4 cs; + + setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB, + gContact[gIdx], dt, positionDrift, positionConstraintCoeff, + &cs ); + + cs.m_batchIdx = gContact[gIdx].m_batchIdx; + + gConstraintOut[gIdx] = cs; + } +} + +__kernel +__attribute__((reqd_work_group_size(WG_SIZE,1,1))) +void CopyConstraintKernel(__global Contact4* gIn, __global Contact4* gOut, int4 cb ) +{ + int gIdx = GET_GLOBAL_IDX; + if( gIdx < cb.x ) + { + gOut[gIdx] = gIn[gIdx]; + } +} \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/SolverKernels.h b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/SolverKernels.h new file mode 100644 index 000000000..c80a6ae05 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/SolverKernels.h @@ -0,0 +1,1037 @@ +static const char* solverKernelsCL= \ +"#pragma OPENCL EXTENSION cl_amd_printf : enable\n" +"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" +"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" +"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" +"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" +"\n" +"\n" +"#ifdef cl_ext_atomic_counters_32\n" +"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" +"#else\n" +"#define counter32_t volatile global int*\n" +"#endif\n" +"\n" +"typedef unsigned int u32;\n" +"typedef unsigned short u16;\n" +"typedef unsigned char u8;\n" +"\n" +"#define GET_GROUP_IDX get_group_id(0)\n" +"#define GET_LOCAL_IDX get_local_id(0)\n" +"#define GET_GLOBAL_IDX get_global_id(0)\n" +"#define GET_GROUP_SIZE get_local_size(0)\n" +"#define GET_NUM_GROUPS get_num_groups(0)\n" +"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" +"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" +"#define AtomInc(x) atom_inc(&(x))\n" +"#define AtomInc1(x, out) out = atom_inc(&(x))\n" +"#define AppendInc(x, out) out = atomic_inc(x)\n" +"#define AtomAdd(x, value) atom_add(&(x), value)\n" +"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" +"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" +"\n" +"\n" +"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" +"\n" +"#define make_float4 (float4)\n" +"#define make_float2 (float2)\n" +"#define make_uint4 (uint4)\n" +"#define make_int4 (int4)\n" +"#define make_uint2 (uint2)\n" +"#define make_int2 (int2)\n" +"\n" +"\n" +"#define max2 max\n" +"#define min2 min\n" +"\n" +"\n" +"///////////////////////////////////////\n" +"// Vector\n" +"///////////////////////////////////////\n" +"__inline\n" +"float fastDiv(float numerator, float denominator)\n" +"{\n" +" return native_divide(numerator, denominator); \n" +"// return numerator/denominator; \n" +"}\n" +"\n" +"__inline\n" +"float4 fastDiv4(float4 numerator, float4 denominator)\n" +"{\n" +" return native_divide(numerator, denominator); \n" +"}\n" +"\n" +"__inline\n" +"float fastSqrtf(float f2)\n" +"{\n" +" return native_sqrt(f2);\n" +"// return sqrt(f2);\n" +"}\n" +"\n" +"__inline\n" +"float fastRSqrt(float f2)\n" +"{\n" +" return native_rsqrt(f2);\n" +"}\n" +"\n" +"__inline\n" +"float fastLength4(float4 v)\n" +"{\n" +" return fast_length(v);\n" +"}\n" +"\n" +"__inline\n" +"float4 fastNormalize4(float4 v)\n" +"{\n" +" return fast_normalize(v);\n" +"}\n" +"\n" +"\n" +"__inline\n" +"float sqrtf(float a)\n" +"{\n" +"// return sqrt(a);\n" +" return native_sqrt(a);\n" +"}\n" +"\n" +"__inline\n" +"float4 cross3(float4 a, float4 b)\n" +"{\n" +" return cross(a,b);\n" +"}\n" +"\n" +"__inline\n" +"float dot3F4(float4 a, float4 b)\n" +"{\n" +" float4 a1 = make_float4(a.xyz,0.f);\n" +" float4 b1 = make_float4(b.xyz,0.f);\n" +" return dot(a1, b1);\n" +"}\n" +"\n" +"__inline\n" +"float length3(const float4 a)\n" +"{\n" +" return sqrtf(dot3F4(a,a));\n" +"}\n" +"\n" +"__inline\n" +"float dot4(const float4 a, const float4 b)\n" +"{\n" +" return dot( a, b );\n" +"}\n" +"\n" +"// for height\n" +"__inline\n" +"float dot3w1(const float4 point, const float4 eqn)\n" +"{\n" +" return dot3F4(point,eqn) + eqn.w;\n" +"}\n" +"\n" +"__inline\n" +"float4 normalize3(const float4 a)\n" +"{\n" +" float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" +" return fastNormalize4( n );\n" +"// float length = sqrtf(dot3F4(a, a));\n" +"// return 1.f/length * a;\n" +"}\n" +"\n" +"__inline\n" +"float4 normalize4(const float4 a)\n" +"{\n" +" float length = sqrtf(dot4(a, a));\n" +" return 1.f/length * a;\n" +"}\n" +"\n" +"__inline\n" +"float4 createEquation(const float4 a, const float4 b, const float4 c)\n" +"{\n" +" float4 eqn;\n" +" float4 ab = b-a;\n" +" float4 ac = c-a;\n" +" eqn = normalize3( cross3(ab, ac) );\n" +" eqn.w = -dot3F4(eqn,a);\n" +" return eqn;\n" +"}\n" +"\n" +"///////////////////////////////////////\n" +"// Matrix3x3\n" +"///////////////////////////////////////\n" +"\n" +"typedef struct\n" +"{\n" +" float4 m_row[3];\n" +"}Matrix3x3;\n" +"\n" +"__inline\n" +"Matrix3x3 mtZero();\n" +"\n" +"__inline\n" +"Matrix3x3 mtIdentity();\n" +"\n" +"__inline\n" +"Matrix3x3 mtTranspose(Matrix3x3 m);\n" +"\n" +"__inline\n" +"Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b);\n" +"\n" +"__inline\n" +"float4 mtMul1(Matrix3x3 a, float4 b);\n" +"\n" +"__inline\n" +"float4 mtMul3(float4 a, Matrix3x3 b);\n" +"\n" +"__inline\n" +"Matrix3x3 mtZero()\n" +"{\n" +" Matrix3x3 m;\n" +" m.m_row[0] = (float4)(0.f);\n" +" m.m_row[1] = (float4)(0.f);\n" +" m.m_row[2] = (float4)(0.f);\n" +" return m;\n" +"}\n" +"\n" +"__inline\n" +"Matrix3x3 mtIdentity()\n" +"{\n" +" Matrix3x3 m;\n" +" m.m_row[0] = (float4)(1,0,0,0);\n" +" m.m_row[1] = (float4)(0,1,0,0);\n" +" m.m_row[2] = (float4)(0,0,1,0);\n" +" return m;\n" +"}\n" +"\n" +"__inline\n" +"Matrix3x3 mtTranspose(Matrix3x3 m)\n" +"{\n" +" Matrix3x3 out;\n" +" out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" +" out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" +" out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" +" return out;\n" +"}\n" +"\n" +"__inline\n" +"Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b)\n" +"{\n" +" Matrix3x3 transB;\n" +" transB = mtTranspose( b );\n" +" Matrix3x3 ans;\n" +" // why this doesn't run when 0ing in the for{}\n" +" a.m_row[0].w = 0.f;\n" +" a.m_row[1].w = 0.f;\n" +" a.m_row[2].w = 0.f;\n" +" for(int i=0; i<3; i++)\n" +" {\n" +"// a.m_row[i].w = 0.f;\n" +" ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]);\n" +" ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]);\n" +" ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]);\n" +" ans.m_row[i].w = 0.f;\n" +" }\n" +" return ans;\n" +"}\n" +"\n" +"__inline\n" +"float4 mtMul1(Matrix3x3 a, float4 b)\n" +"{\n" +" float4 ans;\n" +" ans.x = dot3F4( a.m_row[0], b );\n" +" ans.y = dot3F4( a.m_row[1], b );\n" +" ans.z = dot3F4( a.m_row[2], b );\n" +" ans.w = 0.f;\n" +" return ans;\n" +"}\n" +"\n" +"__inline\n" +"float4 mtMul3(float4 a, Matrix3x3 b)\n" +"{\n" +" float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" +" float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" +" float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" +"\n" +" float4 ans;\n" +" ans.x = dot3F4( a, colx );\n" +" ans.y = dot3F4( a, coly );\n" +" ans.z = dot3F4( a, colz );\n" +" return ans;\n" +"}\n" +"\n" +"///////////////////////////////////////\n" +"// Quaternion\n" +"///////////////////////////////////////\n" +"\n" +"typedef float4 Quaternion;\n" +"\n" +"__inline\n" +"Quaternion qtMul(Quaternion a, Quaternion b);\n" +"\n" +"__inline\n" +"Quaternion qtNormalize(Quaternion in);\n" +"\n" +"__inline\n" +"float4 qtRotate(Quaternion q, float4 vec);\n" +"\n" +"__inline\n" +"Quaternion qtInvert(Quaternion q);\n" +"\n" +"__inline\n" +"Matrix3x3 qtGetRotationMatrix(Quaternion q);\n" +"\n" +"\n" +"\n" +"__inline\n" +"Quaternion qtMul(Quaternion a, Quaternion b)\n" +"{\n" +" Quaternion ans;\n" +" ans = cross3( a, b );\n" +" ans += a.w*b+b.w*a;\n" +"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +" ans.w = a.w*b.w - dot3F4(a, b);\n" +" return ans;\n" +"}\n" +"\n" +"__inline\n" +"Quaternion qtNormalize(Quaternion in)\n" +"{\n" +" return fastNormalize4(in);\n" +"// in /= length( in );\n" +"// return in;\n" +"}\n" +"__inline\n" +"float4 qtRotate(Quaternion q, float4 vec)\n" +"{\n" +" Quaternion qInv = qtInvert( q );\n" +" float4 vcpy = vec;\n" +" vcpy.w = 0.f;\n" +" float4 out = qtMul(qtMul(q,vcpy),qInv);\n" +" return out;\n" +"}\n" +"\n" +"__inline\n" +"Quaternion qtInvert(Quaternion q)\n" +"{\n" +" return (Quaternion)(-q.xyz, q.w);\n" +"}\n" +"\n" +"__inline\n" +"float4 qtInvRotate(const Quaternion q, float4 vec)\n" +"{\n" +" return qtRotate( qtInvert( q ), vec );\n" +"}\n" +"\n" +"__inline\n" +"Matrix3x3 qtGetRotationMatrix(Quaternion quat)\n" +"{\n" +" float4 quat2 = (float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" +" Matrix3x3 out;\n" +"\n" +" out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" +" out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" +" out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" +" out.m_row[0].w = 0.f;\n" +"\n" +" out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" +" out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" +" out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" +" out.m_row[1].w = 0.f;\n" +"\n" +" out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" +" out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" +" out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" +" out.m_row[2].w = 0.f;\n" +"\n" +" return out;\n" +"}\n" +"\n" +"\n" +"\n" +"\n" +"#define WG_SIZE 64\n" +"\n" +"typedef struct\n" +"{\n" +" float4 m_pos;\n" +" Quaternion m_quat;\n" +" float4 m_linVel;\n" +" float4 m_angVel;\n" +"\n" +" u32 m_shapeIdx;\n" +" u32 m_shapeType;\n" +" float m_invMass;\n" +" float m_restituitionCoeff;\n" +" float m_frictionCoeff;\n" +"} Body;\n" +"\n" +"typedef struct\n" +"{\n" +" Matrix3x3 m_invInertia;\n" +" Matrix3x3 m_initInvInertia;\n" +"} Shape;\n" +"\n" +"typedef struct\n" +"{\n" +" float4 m_linear;\n" +" float4 m_worldPos[4];\n" +" float4 m_center; \n" +" float m_jacCoeffInv[4];\n" +" float m_b[4];\n" +" float m_appliedRambdaDt[4];\n" +"\n" +" float m_fJacCoeffInv[2]; \n" +" float m_fAppliedRambdaDt[2]; \n" +"\n" +" u32 m_bodyA;\n" +" u32 m_bodyB;\n" +"\n" +" int m_batchIdx;\n" +" u32 m_paddings[1];\n" +"} Constraint4;\n" +"\n" +"typedef struct\n" +"{\n" +" float4 m_worldPos[4];\n" +" float4 m_worldNormal;\n" +" u32 m_coeffs;\n" +" int m_batchIdx;\n" +"\n" +" u32 m_bodyAPtr;\n" +" u32 m_bodyBPtr;\n" +"} Contact4;\n" +"\n" +"typedef struct\n" +"{\n" +" int m_nConstraints;\n" +" int m_start;\n" +" int m_batchIdx;\n" +" int m_nSplit;\n" +"// int m_paddings[1];\n" +"} ConstBuffer;\n" +"\n" +"typedef struct\n" +"{\n" +" int m_solveFriction;\n" +" int m_maxBatch; // long batch really kills the performance\n" +" int m_batchIdx;\n" +" int m_nSplit;\n" +"// int m_paddings[1];\n" +"} ConstBufferBatchSolve;\n" +"\n" +"\n" +"void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1)\n" +"{\n" +" *linear = -n;\n" +" *angular0 = -cross3(r0, n);\n" +" *angular1 = cross3(r1, n);\n" +"}\n" +"\n" +"\n" +"float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 )\n" +"{\n" +" return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1);\n" +"}\n" +"\n" +"\n" +"float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n" +" float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1)\n" +"{\n" +" // linear0,1 are normlized\n" +" float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0;\n" +" float jmj1 = dot3F4(mtMul3(angular0,*invInertia0), angular0);\n" +" float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1;\n" +" float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1);\n" +" return -1.f/(jmj0+jmj1+jmj2+jmj3);\n" +"}\n" +"\n" +"\n" +"\n" +"void solveContact(__global Constraint4* cs,\n" +" float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA,\n" +" float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB)\n" +"{\n" +" float minRambdaDt = 0;\n" +" float maxRambdaDt = FLT_MAX;\n" +"\n" +" for(int ic=0; ic<4; ic++)\n" +" {\n" +" if( cs->m_jacCoeffInv[ic] == 0.f ) continue;\n" +"\n" +" float4 angular0, angular1, linear;\n" +" float4 r0 = cs->m_worldPos[ic] - posA;\n" +" float4 r1 = cs->m_worldPos[ic] - posB;\n" +" setLinearAndAngular( -cs->m_linear, r0, r1, &linear, &angular0, &angular1 );\n" +"\n" +" float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, \n" +" *linVelA, *angVelA, *linVelB, *angVelB ) + cs->m_b[ic];\n" +" rambdaDt *= cs->m_jacCoeffInv[ic];\n" +"\n" +" {\n" +" float prevSum = cs->m_appliedRambdaDt[ic];\n" +" float updated = prevSum;\n" +" updated += rambdaDt;\n" +" updated = max2( updated, minRambdaDt );\n" +" updated = min2( updated, maxRambdaDt );\n" +" rambdaDt = updated - prevSum;\n" +" cs->m_appliedRambdaDt[ic] = updated;\n" +" }\n" +"\n" +" float4 linImp0 = invMassA*linear*rambdaDt;\n" +" float4 linImp1 = invMassB*(-linear)*rambdaDt;\n" +" float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;\n" +" float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;\n" +"\n" +" *linVelA += linImp0;\n" +" *angVelA += angImp0;\n" +" *linVelB += linImp1;\n" +" *angVelB += angImp1;\n" +" }\n" +"}\n" +"\n" +"\n" +"void solveFriction(__global Constraint4* cs,\n" +" float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA,\n" +" float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB,\n" +" float maxRambdaDt[4], float minRambdaDt[4])\n" +"{\n" +" if( cs->m_fJacCoeffInv[0] == 0 && cs->m_fJacCoeffInv[0] == 0 ) return;\n" +" const float4 center = cs->m_center;\n" +"\n" +" float4 n = -cs->m_linear;\n" +"\n" +" float4 tangent[2];\n" +" tangent[0] = cross3( n, cs->m_worldPos[0]-center );\n" +" tangent[1] = cross3( tangent[0], n );\n" +" tangent[0] = normalize3( tangent[0] );\n" +" tangent[1] = normalize3( tangent[1] );\n" +"\n" +" float4 angular0, angular1, linear;\n" +" float4 r0 = center - posA;\n" +" float4 r1 = center - posB;\n" +" for(int i=0; i<2; i++)\n" +" {\n" +" setLinearAndAngular( tangent[i], r0, r1, &linear, &angular0, &angular1 );\n" +" float rambdaDt = calcRelVel(linear, -linear, angular0, angular1,\n" +" *linVelA, *angVelA, *linVelB, *angVelB );\n" +" rambdaDt *= cs->m_fJacCoeffInv[i];\n" +"\n" +" {\n" +" float prevSum = cs->m_fAppliedRambdaDt[i];\n" +" float updated = prevSum;\n" +" updated += rambdaDt;\n" +" updated = max2( updated, minRambdaDt[i] );\n" +" updated = min2( updated, maxRambdaDt[i] );\n" +" rambdaDt = updated - prevSum;\n" +" cs->m_fAppliedRambdaDt[i] = updated;\n" +" }\n" +"\n" +" float4 linImp0 = invMassA*linear*rambdaDt;\n" +" float4 linImp1 = invMassB*(-linear)*rambdaDt;\n" +" float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;\n" +" float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;\n" +"\n" +" *linVelA += linImp0;\n" +" *angVelA += angImp0;\n" +" *linVelB += linImp1;\n" +" *angVelB += angImp1;\n" +" }\n" +" { // angular damping for point constraint\n" +" float4 ab = normalize3( posB - posA );\n" +" float4 ac = normalize3( center - posA );\n" +" if( dot3F4( ab, ac ) > 0.95f || (invMassA == 0.f || invMassB == 0.f))\n" +" {\n" +" float angNA = dot3F4( n, *angVelA );\n" +" float angNB = dot3F4( n, *angVelB );\n" +"\n" +" *angVelA -= (angNA*0.1f)*n;\n" +" *angVelB -= (angNB*0.1f)*n;\n" +" }\n" +" }\n" +"}\n" +"\n" +"void solveAConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs)\n" +"{\n" +" float frictionCoeff = ldsCs[0].m_linear.w;\n" +" int aIdx = ldsCs[0].m_bodyA;\n" +" int bIdx = ldsCs[0].m_bodyB;\n" +"\n" +" float4 posA = gBodies[aIdx].m_pos;\n" +" float4 linVelA = gBodies[aIdx].m_linVel;\n" +" float4 angVelA = gBodies[aIdx].m_angVel;\n" +" float invMassA = gBodies[aIdx].m_invMass;\n" +" Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n" +"\n" +" float4 posB = gBodies[bIdx].m_pos;\n" +" float4 linVelB = gBodies[bIdx].m_linVel;\n" +" float4 angVelB = gBodies[bIdx].m_angVel;\n" +" float invMassB = gBodies[bIdx].m_invMass;\n" +" Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n" +" \n" +" \n" +" {\n" +" solveContact( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n" +" posB, &linVelB, &angVelB, invMassB, invInertiaB );\n" +" }\n" +"\n" +" {\n" +" float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};\n" +" float minRambdaDt[4] = {0.f,0.f,0.f,0.f};\n" +"\n" +" float sum = 0;\n" +" for(int j=0; j<4; j++)\n" +" {\n" +" sum +=ldsCs[0].m_appliedRambdaDt[j];\n" +" }\n" +" frictionCoeff = 0.7f;\n" +" for(int j=0; j<4; j++)\n" +" {\n" +" maxRambdaDt[j] = frictionCoeff*sum;\n" +" minRambdaDt[j] = -maxRambdaDt[j];\n" +" }\n" +"\n" +" solveFriction( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n" +" posB, &linVelB, &angVelB, invMassB, invInertiaB, maxRambdaDt, minRambdaDt );\n" +" }\n" +"\n" +" gBodies[aIdx].m_linVel = linVelA;\n" +" gBodies[aIdx].m_angVel = angVelA;\n" +" gBodies[bIdx].m_linVel = linVelB;\n" +" gBodies[bIdx].m_angVel = angVelB;\n" +"}\n" +"\n" +"void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs)\n" +"{\n" +" float frictionCoeff = ldsCs[0].m_linear.w;\n" +" int aIdx = ldsCs[0].m_bodyA;\n" +" int bIdx = ldsCs[0].m_bodyB;\n" +"\n" +" float4 posA = gBodies[aIdx].m_pos;\n" +" float4 linVelA = gBodies[aIdx].m_linVel;\n" +" float4 angVelA = gBodies[aIdx].m_angVel;\n" +" float invMassA = gBodies[aIdx].m_invMass;\n" +" Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n" +"\n" +" float4 posB = gBodies[bIdx].m_pos;\n" +" float4 linVelB = gBodies[bIdx].m_linVel;\n" +" float4 angVelB = gBodies[bIdx].m_angVel;\n" +" float invMassB = gBodies[bIdx].m_invMass;\n" +" Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n" +"\n" +" solveContact( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n" +" posB, &linVelB, &angVelB, invMassB, invInertiaB );\n" +"\n" +" gBodies[aIdx].m_linVel = linVelA;\n" +" gBodies[aIdx].m_angVel = angVelA;\n" +" gBodies[bIdx].m_linVel = linVelB;\n" +" gBodies[bIdx].m_angVel = angVelB;\n" +"}\n" +"\n" +"void solveFrictionConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs)\n" +"{\n" +" float frictionCoeff = ldsCs[0].m_linear.w;\n" +" int aIdx = ldsCs[0].m_bodyA;\n" +" int bIdx = ldsCs[0].m_bodyB;\n" +"\n" +" float4 posA = gBodies[aIdx].m_pos;\n" +" float4 linVelA = gBodies[aIdx].m_linVel;\n" +" float4 angVelA = gBodies[aIdx].m_angVel;\n" +" float invMassA = gBodies[aIdx].m_invMass;\n" +" Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n" +"\n" +" float4 posB = gBodies[bIdx].m_pos;\n" +" float4 linVelB = gBodies[bIdx].m_linVel;\n" +" float4 angVelB = gBodies[bIdx].m_angVel;\n" +" float invMassB = gBodies[bIdx].m_invMass;\n" +" Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n" +"\n" +" {\n" +" float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};\n" +" float minRambdaDt[4] = {0.f,0.f,0.f,0.f};\n" +"\n" +" float sum = 0;\n" +" for(int j=0; j<4; j++)\n" +" {\n" +" sum +=ldsCs[0].m_appliedRambdaDt[j];\n" +" }\n" +" frictionCoeff = 0.7f;\n" +" for(int j=0; j<4; j++)\n" +" {\n" +" maxRambdaDt[j] = frictionCoeff*sum;\n" +" minRambdaDt[j] = -maxRambdaDt[j];\n" +" }\n" +"\n" +" solveFriction( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n" +" posB, &linVelB, &angVelB, invMassB, invInertiaB, maxRambdaDt, minRambdaDt );\n" +" }\n" +"\n" +" gBodies[aIdx].m_linVel = linVelA;\n" +" gBodies[aIdx].m_angVel = angVelA;\n" +" gBodies[bIdx].m_linVel = linVelB;\n" +" gBodies[bIdx].m_angVel = angVelB;\n" +"}\n" +"\n" +"typedef struct \n" +"{\n" +" int m_valInt0;\n" +" int m_valInt1;\n" +" int m_valInt2;\n" +" int m_valInt3;\n" +"\n" +" float m_val0;\n" +" float m_val1;\n" +" float m_val2;\n" +" float m_val3;\n" +"} SolverDebugInfo;\n" +"\n" +"\n" +"__kernel\n" +"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" +"//void BatchSolveKernel(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* gConstraints, __global int* gN, __global int* gOffsets, __global SolverDebugInfo* debugInfo, ConstBufferBatchSolve cb)\n" +"void BatchSolveKernel(__global Body* gBodies, \n" +"__global Shape* gShapes, \n" +"__global Constraint4* gConstraints, \n" +"__global int* gN, \n" +"__global int* gOffsets, \n" +"ConstBufferBatchSolve cb)\n" +"{\n" +" __local int ldsBatchIdx[WG_SIZE+1];\n" +"\n" +" __local int ldsCurBatch;\n" +" __local int ldsNextBatch;\n" +" __local int ldsStart;\n" +"\n" +" int lIdx = GET_LOCAL_IDX;\n" +" int wgIdx = GET_GROUP_IDX;\n" +"\n" +" int gIdx = GET_GLOBAL_IDX;\n" +"// debugInfo[gIdx].m_valInt0 = gIdx;\n" +" //debugInfo[gIdx].m_valInt1 = GET_GROUP_SIZE;\n" +"\n" +" const int solveFriction = cb.m_solveFriction;\n" +" const int maxBatch = cb.m_maxBatch;\n" +" const int bIdx = cb.m_batchIdx;\n" +" const int nSplit = cb.m_nSplit;\n" +"\n" +" int xIdx = (wgIdx/(nSplit/2))*2 + (bIdx&1);\n" +" int yIdx = (wgIdx%(nSplit/2))*2 + (bIdx>>1);\n" +" int cellIdx = xIdx+yIdx*nSplit;\n" +" \n" +" if( gN[cellIdx] == 0 ) \n" +" return;\n" +"\n" +" const int start = gOffsets[cellIdx];\n" +" const int end = start + gN[cellIdx];\n" +"\n" +" \n" +" if( lIdx == 0 )\n" +" {\n" +" ldsCurBatch = 0;\n" +" ldsNextBatch = 0;\n" +" ldsStart = start;\n" +" }\n" +"\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" int idx=ldsStart+lIdx;\n" +" while (ldsCurBatch < maxBatch)\n" +" {\n" +" for(; idxm_bodyA = src.m_bodyAPtr;\n" +" dstC->m_bodyB = src.m_bodyBPtr;\n" +"\n" +" float dtInv = 1.f/dt;\n" +" for(int ic=0; ic<4; ic++)\n" +" {\n" +" dstC->m_appliedRambdaDt[ic] = 0.f;\n" +" }\n" +" dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f;\n" +"\n" +"\n" +" dstC->m_linear = -src.m_worldNormal;\n" +" dstC->m_linear.w = 0.7f ;//src.getFrictionCoeff() );\n" +" for(int ic=0; ic<4; ic++)\n" +" {\n" +" float4 r0 = src.m_worldPos[ic] - posA;\n" +" float4 r1 = src.m_worldPos[ic] - posB;\n" +"\n" +" if( ic >= src.m_worldNormal.w )//npoints\n" +" {\n" +" dstC->m_jacCoeffInv[ic] = 0.f;\n" +" continue;\n" +" }\n" +"\n" +" float relVelN;\n" +" {\n" +" float4 linear, angular0, angular1;\n" +" setLinearAndAngular(src.m_worldNormal, r0, r1, &linear, &angular0, &angular1);\n" +"\n" +" dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,\n" +" invMassA, &invInertiaA, invMassB, &invInertiaB );\n" +"\n" +" relVelN = calcRelVel(linear, -linear, angular0, angular1,\n" +" linVelA, angVelA, linVelB, angVelB);\n" +"\n" +" float e = 0.f;//src.getRestituitionCoeff();\n" +" if( relVelN*relVelN < 0.004f ) e = 0.f;\n" +"\n" +" dstC->m_b[ic] = e*relVelN;\n" +" //float penetration = src.m_worldPos[ic].w;\n" +" dstC->m_b[ic] += (src.m_worldPos[ic].w + positionDrift)*positionConstraintCoeff*dtInv;\n" +" dstC->m_appliedRambdaDt[ic] = 0.f;\n" +" }\n" +" }\n" +"\n" +" if( src.m_worldNormal.w > 1 )//npoints\n" +" { // prepare friction\n" +" float4 center = make_float4(0.f);\n" +" for(int i=0; im_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,\n" +" invMassA, &invInertiaA, invMassB, &invInertiaB );\n" +" dstC->m_fAppliedRambdaDt[i] = 0.f;\n" +" }\n" +" dstC->m_center = center;\n" +" }\n" +" else\n" +" {\n" +" // single point constraint\n" +" }\n" +"\n" +" for(int i=0; i<4; i++)\n" +" {\n" +" if( im_worldPos[i] = src.m_worldPos[i];\n" +" }\n" +" else\n" +" {\n" +" dstC->m_worldPos[i] = make_float4(0.f);\n" +" }\n" +" }\n" +"}\n" +"\n" +"typedef struct\n" +"{\n" +" int m_nContacts;\n" +" float m_dt;\n" +" float m_positionDrift;\n" +" float m_positionConstraintCoeff;\n" +"} ConstBufferCTC;\n" +"\n" +"__kernel\n" +"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" +"void ContactToConstraintKernel(__global Contact4* gContact, __global Body* gBodies, __global Shape* gShapes, __global Constraint4* gConstraintOut, ConstBufferCTC cb)\n" +"{\n" +" int gIdx = GET_GLOBAL_IDX;\n" +" int nContacts = cb.m_nContacts;\n" +" float dt = cb.m_dt;\n" +" float positionDrift = cb.m_positionDrift;\n" +" float positionConstraintCoeff = cb.m_positionConstraintCoeff;\n" +"\n" +" if( gIdx < nContacts )\n" +" {\n" +" int aIdx = gContact[gIdx].m_bodyAPtr;\n" +" int bIdx = gContact[gIdx].m_bodyBPtr;\n" +"\n" +" float4 posA = gBodies[aIdx].m_pos;\n" +" float4 linVelA = gBodies[aIdx].m_linVel;\n" +" float4 angVelA = gBodies[aIdx].m_angVel;\n" +" float invMassA = gBodies[aIdx].m_invMass;\n" +" Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n" +"\n" +" float4 posB = gBodies[bIdx].m_pos;\n" +" float4 linVelB = gBodies[bIdx].m_linVel;\n" +" float4 angVelB = gBodies[bIdx].m_angVel;\n" +" float invMassB = gBodies[bIdx].m_invMass;\n" +" Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n" +"\n" +" Constraint4 cs;\n" +"\n" +" setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB,\n" +" gContact[gIdx], dt, positionDrift, positionConstraintCoeff, \n" +" &cs );\n" +" \n" +" cs.m_batchIdx = gContact[gIdx].m_batchIdx;\n" +"\n" +" gConstraintOut[gIdx] = cs;\n" +" }\n" +"}\n" +"\n" +"__kernel\n" +"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" +"void CopyConstraintKernel(__global Contact4* gIn, __global Contact4* gOut, int4 cb )\n" +"{\n" +" int gIdx = GET_GLOBAL_IDX;\n" +" if( gIdx < cb.x )\n" +" {\n" +" gOut[gIdx] = gIn[gIdx];\n" +" }\n" +"}\n" +; diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/batchingKernels.cl b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/batchingKernels.cl new file mode 100644 index 000000000..eee80c1a3 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/batchingKernels.cl @@ -0,0 +1,338 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +#pragma OPENCL EXTENSION cl_amd_printf : enable +#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable +#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable +#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable +#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable + +#ifdef cl_ext_atomic_counters_32 +#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable +#else +#define counter32_t volatile __global int* +#endif + + +typedef unsigned int u32; +typedef unsigned short u16; +typedef unsigned char u8; + +#define GET_GROUP_IDX get_group_id(0) +#define GET_LOCAL_IDX get_local_id(0) +#define GET_GLOBAL_IDX get_global_id(0) +#define GET_GROUP_SIZE get_local_size(0) +#define GET_NUM_GROUPS get_num_groups(0) +#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) +#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) +#define AtomInc(x) atom_inc(&(x)) +#define AtomInc1(x, out) out = atom_inc(&(x)) +#define AppendInc(x, out) out = atomic_inc(x) +#define AtomAdd(x, value) atom_add(&(x), value) +#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value ) +#define AtomXhg(x, value) atom_xchg ( &(x), value ) + + +#define SELECT_UINT4( b, a, condition ) select( b,a,condition ) + +#define make_float4 (float4) +#define make_float2 (float2) +#define make_uint4 (uint4) +#define make_int4 (int4) +#define make_uint2 (uint2) +#define make_int2 (int2) + + +#define max2 max +#define min2 min + + +#define WG_SIZE 64 + + + +typedef struct +{ + float4 m_worldPos[4]; + float4 m_worldNormal; + u32 m_coeffs; + int m_batchIdx; + + u32 m_bodyA; + u32 m_bodyB; +}Contact4; + +typedef struct +{ + int m_n; + int m_start; + int m_staticIdx; + int m_paddings[1]; +} ConstBuffer; + +typedef struct +{ + u32 m_a; + u32 m_b; + u32 m_idx; +}Elem; + +#define STACK_SIZE (WG_SIZE*10) +//#define STACK_SIZE (WG_SIZE) +#define RING_SIZE 1024 +#define RING_SIZE_MASK (RING_SIZE-1) +#define CHECK_SIZE (WG_SIZE) + + +#define GET_RING_CAPACITY (RING_SIZE - ldsRingEnd) +#define RING_END ldsTmp + +u32 readBuf(__local u32* buff, int idx) +{ + idx = idx % (32*CHECK_SIZE); + int bitIdx = idx%32; + int bufIdx = idx/32; + return buff[bufIdx] & (1<> bitIdx)&1) == 0; +} + +// batching on the GPU +__kernel void CreateBatches( __global Contact4* gConstraints, __global Contact4* gConstraintsOut, + __global u32* gN, __global u32* gStart, + ConstBuffer cb ) +{ + __local u32 ldsStackIdx[STACK_SIZE]; + __local u32 ldsStackEnd; + __local Elem ldsRingElem[RING_SIZE]; + __local u32 ldsRingEnd; + __local u32 ldsTmp; + __local u32 ldsCheckBuffer[CHECK_SIZE]; + __local u32 ldsFixedBuffer[CHECK_SIZE]; + __local u32 ldsGEnd; + __local u32 ldsDstEnd; + + int wgIdx = GET_GROUP_IDX; + int lIdx = GET_LOCAL_IDX; + + const int m_n = gN[wgIdx]; + const int m_start = gStart[wgIdx]; + const int m_staticIdx = cb.m_staticIdx; + + if( lIdx == 0 ) + { + ldsRingEnd = 0; + ldsGEnd = 0; + ldsStackEnd = 0; + ldsDstEnd = m_start; + } + +// while(1) + for(int ie=0; ie<250; ie++) + { + ldsFixedBuffer[lIdx] = 0; + + for(int giter=0; giter<4; giter++) + { + int ringCap = GET_RING_CAPACITY; + + // 1. fill ring + if( ldsGEnd < m_n ) + { + while( ringCap > WG_SIZE ) + { + if( ldsGEnd >= m_n ) break; + if( lIdx < ringCap - WG_SIZE ) + { + int srcIdx; + AtomInc1( ldsGEnd, srcIdx ); + if( srcIdx < m_n ) + { + int dstIdx; + AtomInc1( ldsRingEnd, dstIdx ); + + int a = gConstraints[m_start+srcIdx].m_bodyA; + int b = gConstraints[m_start+srcIdx].m_bodyB; + ldsRingElem[dstIdx].m_a = (a>b)? b:a; + ldsRingElem[dstIdx].m_b = (a>b)? a:b; + ldsRingElem[dstIdx].m_idx = srcIdx; + } + } + ringCap = GET_RING_CAPACITY; + } + } + + GROUP_LDS_BARRIER; + + // 2. fill stack + __local Elem* dst = ldsRingElem; + if( lIdx == 0 ) RING_END = 0; + + int srcIdx=lIdx; + int end = ldsRingEnd; + + { + for(int ii=0; ii> bitIdx)&1) == 0;\n" +"}\n" +"\n" +"typedef struct \n" +"{\n" +" int m_valInt0;\n" +" int m_valInt1;\n" +" int m_valInt2;\n" +" int m_valInt3;\n" +"\n" +" int m_valInt4;\n" +" int m_valInt5;\n" +" int m_valInt6;\n" +" int m_valInt7;\n" +"\n" +" int m_valInt8;\n" +" int m_valInt9;\n" +" int m_valInt10;\n" +" int m_valInt11;\n" +" \n" +" int m_valInt12;\n" +" int m_valInt13;\n" +" int m_valInt14;\n" +" int m_valInt15;\n" +"\n" +"\n" +" float m_fval0;\n" +" float m_fval1;\n" +" float m_fval2;\n" +" float m_fval3;\n" +"} SolverDebugInfo;\n" +"\n" +"// batching on the GPU\n" +"__kernel void CreateBatches( __global Contact4* gConstraints, __global Contact4* gConstraintsOut, //__global u32* gRes, \n" +" __global u32* gN, __global u32* gStart, \n" +"// __global SolverDebugInfo* debugInfo, \n" +" ConstBuffer cb )\n" +"{\n" +" __local u32 ldsStackIdx[STACK_SIZE];\n" +" __local u32 ldsStackEnd;\n" +" __local Elem ldsRingElem[RING_SIZE];\n" +" __local u32 ldsRingEnd;\n" +" __local u32 ldsTmp;\n" +" __local u32 ldsCheckBuffer[CHECK_SIZE];\n" +" __local u32 ldsFixedBuffer[CHECK_SIZE];\n" +" __local u32 ldsGEnd;\n" +" __local u32 ldsDstEnd;\n" +"\n" +" int wgIdx = GET_GROUP_IDX;\n" +" int lIdx = GET_LOCAL_IDX;\n" +" \n" +" const int m_n = gN[wgIdx];\n" +" const int m_start = gStart[wgIdx];\n" +" const int m_staticIdx = cb.m_staticIdx;\n" +" \n" +" if( lIdx == 0 )\n" +" {\n" +" ldsRingEnd = 0;\n" +" ldsGEnd = 0;\n" +" ldsStackEnd = 0;\n" +" ldsDstEnd = m_start;\n" +" }\n" +" \n" +"// while(1)\n" +" for(int ie=0; ie<250; ie++)\n" +" {\n" +" ldsFixedBuffer[lIdx] = 0;\n" +"\n" +" for(int giter=0; giter<4; giter++)\n" +" {\n" +" int ringCap = GET_RING_CAPACITY;\n" +" \n" +" // 1. fill ring\n" +" if( ldsGEnd < m_n )\n" +" {\n" +" while( ringCap > WG_SIZE )\n" +" {\n" +" if( ldsGEnd >= m_n ) break;\n" +" if( lIdx < ringCap - WG_SIZE )\n" +" {\n" +" int srcIdx;\n" +" AtomInc1( ldsGEnd, srcIdx );\n" +" if( srcIdx < m_n )\n" +" {\n" +" int dstIdx;\n" +" AtomInc1( ldsRingEnd, dstIdx );\n" +" \n" +" int a = gConstraints[m_start+srcIdx].m_bodyA;\n" +" int b = gConstraints[m_start+srcIdx].m_bodyB;\n" +" ldsRingElem[dstIdx].m_a = (a>b)? b:a;\n" +" ldsRingElem[dstIdx].m_b = (a>b)? a:b;\n" +" ldsRingElem[dstIdx].m_idx = srcIdx;\n" +" }\n" +" }\n" +" ringCap = GET_RING_CAPACITY;\n" +" }\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +" \n" +" // 2. fill stack\n" +" __local Elem* dst = ldsRingElem;\n" +" if( lIdx == 0 ) RING_END = 0;\n" +"\n" +" int srcIdx=lIdx;\n" +" int end = ldsRingEnd;\n" +"\n" +" {\n" +" for(int ii=0; iiChNarrowphaseKernels.h + + +@echo Warning: +@echo You might still need to find/replace for \\n (due to macros) and replace #include statements by their content +pause diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/stringifykernelsAll.bat b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/stringifykernelsAll.bat new file mode 100644 index 000000000..9854d9352 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/stringifykernelsAll.bat @@ -0,0 +1,10 @@ +stringify.py ChNarrowphaseKernels.cl narrowphaseKernelsCL >ChNarrowphaseKernels.h +stringify.py SolverKernels.cl solverKernelsCL >SolverKernels.h +stringify.py batchingKernels.cl batchingKernelsCL >batchingKernels.h + + + + +@echo Warning: +@echo You might still need to find/replace for \\n (due to macros) and replace #include statements by their content +pause diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/stringifykernelsBatching.bat b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/stringifykernelsBatching.bat new file mode 100644 index 000000000..1282f7e28 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/stringifykernelsBatching.bat @@ -0,0 +1,8 @@ +stringify.py batchingKernels.cl batchingKernelsCL >batchingKernels.h + + + + +@echo Warning: +@echo You might still need to find/replace for \\n (due to macros) and replace #include statements by their content +pause diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/stringifykernelsNarrowphase.bat b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/stringifykernelsNarrowphase.bat new file mode 100644 index 000000000..20a0d3ea8 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/stringifykernelsNarrowphase.bat @@ -0,0 +1,8 @@ +stringify.py ChNarrowphaseKernels.cl narrowphaseKernelsCL >ChNarrowphaseKernels.h + + + + +@echo Warning: +@echo You might still need to find/replace for \\n (due to macros) and replace #include statements by their content +pause diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/stringifykernelsSolver.bat b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/stringifykernelsSolver.bat new file mode 100644 index 000000000..ff483deb9 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/stringifykernelsSolver.bat @@ -0,0 +1,8 @@ +stringify.py SolverKernels.cl solverKernelsCL >SolverKernels.h + + + + +@echo Warning: +@echo You might still need to find/replace for \\n (due to macros) and replace #include statements by their content +pause diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/main.cpp b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/main.cpp new file mode 100644 index 000000000..26846f90c --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/main.cpp @@ -0,0 +1,77 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Erwin Coumans + +#include "BasicDemo.h" +#include "GlutStuff.h" +#include "btBulletDynamicsCommon.h" +#include "LinearMath/btHashMap.h" + +#ifdef CL_PLATFORM_AMD +#include "../../opencl/basic_initialize/btOpenCLUtils.h" +extern cl_context g_cxMainContext; +extern cl_command_queue g_cqCommandQue; +extern cl_device_id g_clDevice; +#endif + + + +int main(int argc,char** argv) +{ + + #ifdef CL_PLATFORM_AMD + int ciErrNum = 0; + const char* vendorSDK = btOpenCLUtils::getSdkVendorName(); + printf("This program was compiled using the %s OpenCL SDK\n",vendorSDK); + + cl_device_type deviceType = CL_DEVICE_TYPE_GPU;//CPU;//GPU; + + + void* glCtx=0; + void* glDC = 0; + g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + + int numDev = btOpenCLUtils::getNumDevices(g_cxMainContext); + + if (numDev>0) + { + int deviceIndex =0; + g_clDevice = btOpenCLUtils::getDevice(g_cxMainContext,deviceIndex); + btOpenCLDeviceInfo clInfo; + btOpenCLUtils::getDeviceInfo(g_clDevice,clInfo); + btOpenCLUtils::printDeviceInfo(g_clDevice); + // create a command-queue + g_cqCommandQue = clCreateCommandQueue(g_cxMainContext, g_clDevice, 0, &ciErrNum); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + } +#endif //#ifdef CL_PLATFORM_AMD + + + BasicDemo ccdDemo; + ccdDemo.initPhysics(); + + +#ifdef CHECK_MEMORY_LEAKS + ccdDemo.exitPhysics(); +#else + glutmain(argc, argv,1024,600,"Bullet Physics Demo. http://bulletphysics.org",&ccdDemo); +#endif + + //setupGUI(1024,768); + glutMainLoop(); + //default glut doesn't return from mainloop + return 0; +} + diff --git a/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/premake4.lua b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/premake4.lua new file mode 100644 index 000000000..c779ff987 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/basic_demo/premake4.lua @@ -0,0 +1,34 @@ + +-- include "AMD" + +if os.is("Windows") then + + project "basic_bullet2_demo" + + language "C++" + + kind "ConsoleApp" + targetdir "../../bin" + + includedirs { + ".", + "../../bullet2", + "../testbed", + "../../rendering/Gwen", + } + + + links { "testbed", + "bullet2", + "gwen" + } + + initOpenGL() + initGlut() + + files { + "**.cpp", + "**.h" + } + +end diff --git a/Extras/RigidBodyGpuPipeline/dynamics/testbed/DebugCastResult.h b/Extras/RigidBodyGpuPipeline/dynamics/testbed/DebugCastResult.h new file mode 100644 index 000000000..ef3befe44 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/testbed/DebugCastResult.h @@ -0,0 +1,88 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#ifndef DEBUG_CAST_RESULT_H +#define DEBUG_CAST_RESULT_H + +#include "BulletCollision/NarrowPhaseCollision/btConvexCast.h" +#include "LinearMath/btTransform.h" +#include "GL_ShapeDrawer.h" +#include "GlutStuff.h" +#ifdef WIN32 +#include +#endif +//think different +#if defined(__APPLE__) && !defined (VMDMESA) +#include +#include +#else +#include +#endif +struct btDebugCastResult : public btConvexCast::CastResult +{ + + btTransform m_fromTrans; + const btPolyhedralConvexShape* m_shape; + btVector3 m_linVel; + btVector3 m_angVel; + GL_ShapeDrawer* m_shapeDrawer; + + btDebugCastResult(const btTransform& fromTrans,const btPolyhedralConvexShape* shape, + const btVector3& linVel,const btVector3& angVel,GL_ShapeDrawer* drawer) + :m_fromTrans(fromTrans), + m_shape(shape), + m_linVel(linVel), + m_angVel(angVel), + m_shapeDrawer(drawer) + { + } + + virtual void drawCoordSystem(const btTransform& tr) + { + btScalar m[16]; + tr.getOpenGLMatrix(m); + glPushMatrix(); + btglLoadMatrix(m); + glBegin(GL_LINES); + btglColor3(1, 0, 0); + btglVertex3(0, 0, 0); + btglVertex3(1, 0, 0); + btglColor3(0, 1, 0); + btglVertex3(0, 0, 0); + btglVertex3(0, 1, 0); + btglColor3(0, 0, 1); + btglVertex3(0, 0, 0); + btglVertex3(0, 0, 1); + glEnd(); + glPopMatrix(); + } + + virtual void DebugDraw(btScalar fraction) + { + btVector3 worldBoundsMin(-1000,-1000,-1000); + btVector3 worldBoundsMax(1000,1000,1000); + + + btScalar m[16]; + btTransform hitTrans; + btTransformUtil::integrateTransform(m_fromTrans,m_linVel,m_angVel,fraction,hitTrans); + hitTrans.getOpenGLMatrix(m); + if (m_shapeDrawer) + m_shapeDrawer->drawOpenGL(m,m_shape,btVector3(1,0,0),btIDebugDraw::DBG_NoDebug,worldBoundsMin,worldBoundsMax); + } +}; + + +#endif //DEBUG_CAST_RESULT_H diff --git a/Extras/RigidBodyGpuPipeline/dynamics/testbed/DemoApplication.cpp b/Extras/RigidBodyGpuPipeline/dynamics/testbed/DemoApplication.cpp new file mode 100644 index 000000000..d582570e6 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/testbed/DemoApplication.cpp @@ -0,0 +1,1375 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + +#include "DemoApplication.h" +#include "LinearMath/btIDebugDraw.h" +#include "BulletDynamics/Dynamics/btDynamicsWorld.h" + +#include "BulletDynamics/ConstraintSolver/btPoint2PointConstraint.h"//picking +#include "BulletDynamics/ConstraintSolver/btGeneric6DofConstraint.h"//picking + +#include "BulletCollision/CollisionShapes/btCollisionShape.h" +#include "BulletCollision/CollisionShapes/btBoxShape.h" +#include "BulletCollision/CollisionShapes/btSphereShape.h" +#include "BulletCollision/CollisionShapes/btCompoundShape.h" +#include "BulletCollision/CollisionShapes/btUniformScalingShape.h" +#include "BulletDynamics/ConstraintSolver/btConstraintSolver.h" +#include "GL_ShapeDrawer.h" +#include "LinearMath/btQuickprof.h" +#include "LinearMath/btDefaultMotionState.h" +#include "LinearMath/btSerializer.h" +#include "GLDebugFont.h" + +static bool use6Dof = false; +extern bool gDisableDeactivation; +int numObjects = 0; +const int maxNumObjects = 16384; +btTransform startTransforms[maxNumObjects]; +btCollisionShape* gShapePtr[maxNumObjects];//1 rigidbody has 1 shape (no re-use of shapes) +#define SHOW_NUM_DEEP_PENETRATIONS 1 + +extern int gNumClampedCcdMotions; + +#ifdef SHOW_NUM_DEEP_PENETRATIONS +extern int gNumDeepPenetrationChecks; + +extern int gNumSplitImpulseRecoveries; +extern int gNumGjkChecks; +extern int gNumAlignedAllocs; +extern int gNumAlignedFree; +extern int gTotalBytesAlignedAllocs; + +#endif // + + +DemoApplication::DemoApplication() +//see btIDebugDraw.h for modes +: +m_dynamicsWorld(0), +m_pickConstraint(0), +m_shootBoxShape(0), +m_cameraDistance(15.0), +m_debugMode(0), +m_ele(20.f), +m_azi(0.f), +m_cameraPosition(0.f,0.f,0.f), +m_cameraTargetPosition(0.f,0.f,0.f), +m_mouseOldX(0), +m_mouseOldY(0), +m_mouseButtons(0), +m_modifierKeys(0), +m_scaleBottom(0.5f), +m_scaleFactor(2.f), +m_cameraUp(0,1,0), +m_forwardAxis(2), +m_glutScreenWidth(0), +m_glutScreenHeight(0), +m_frustumZNear(1.f), +m_frustumZFar(10000.f), +m_ortho(0), +m_ShootBoxInitialSpeed(40.f), +m_stepping(true), +m_singleStep(false), +m_idle(false), + +m_enableshadows(false), +m_sundirection(btVector3(1,-2,1)*1000), +m_defaultContactProcessingThreshold(BT_LARGE_FLOAT) +{ +#ifndef BT_NO_PROFILE + m_profileIterator = CProfileManager::Get_Iterator(); +#endif //BT_NO_PROFILE + + m_shapeDrawer = new GL_ShapeDrawer (); + m_shapeDrawer->enableTexture(true); + m_enableshadows = false; +} + + + +DemoApplication::~DemoApplication() +{ +#ifndef BT_NO_PROFILE + CProfileManager::Release_Iterator(m_profileIterator); +#endif //BT_NO_PROFILE + + if (m_shootBoxShape) + delete m_shootBoxShape; + + if (m_shapeDrawer) + delete m_shapeDrawer; +} + + +void DemoApplication::overrideGLShapeDrawer (GL_ShapeDrawer* shapeDrawer) +{ + shapeDrawer->enableTexture (m_shapeDrawer->hasTextureEnabled()); + delete m_shapeDrawer; + m_shapeDrawer = shapeDrawer; +} + +void DemoApplication::myinit(void) +{ + + GLfloat light_ambient[] = { btScalar(0.2), btScalar(0.2), btScalar(0.2), btScalar(1.0) }; + GLfloat light_diffuse[] = { btScalar(1.0), btScalar(1.0), btScalar(1.0), btScalar(1.0) }; + GLfloat light_specular[] = { btScalar(1.0), btScalar(1.0), btScalar(1.0), btScalar(1.0 )}; + /* light_position is NOT default value */ + GLfloat light_position0[] = { btScalar(1.0), btScalar(10.0), btScalar(1.0), btScalar(0.0 )}; + GLfloat light_position1[] = { btScalar(-1.0), btScalar(-10.0), btScalar(-1.0), btScalar(0.0) }; + + glLightfv(GL_LIGHT0, GL_AMBIENT, light_ambient); + glLightfv(GL_LIGHT0, GL_DIFFUSE, light_diffuse); + glLightfv(GL_LIGHT0, GL_SPECULAR, light_specular); + glLightfv(GL_LIGHT0, GL_POSITION, light_position0); + + glLightfv(GL_LIGHT1, GL_AMBIENT, light_ambient); + glLightfv(GL_LIGHT1, GL_DIFFUSE, light_diffuse); + glLightfv(GL_LIGHT1, GL_SPECULAR, light_specular); + glLightfv(GL_LIGHT1, GL_POSITION, light_position1); + + glEnable(GL_LIGHTING); + glEnable(GL_LIGHT0); + glEnable(GL_LIGHT1); + + + glShadeModel(GL_SMOOTH); + glEnable(GL_DEPTH_TEST); + glDepthFunc(GL_LESS); + + glClearColor(btScalar(0.7),btScalar(0.7),btScalar(0.7),btScalar(0)); + + // glEnable(GL_CULL_FACE); + // glCullFace(GL_BACK); +} + + +void DemoApplication::setCameraDistance(float dist) +{ + m_cameraDistance = dist; +} + +float DemoApplication::getCameraDistance() +{ + return m_cameraDistance; +} + + + +void DemoApplication::toggleIdle() { + if (m_idle) { + m_idle = false; + } + else { + m_idle = true; + } +} + + + + +void DemoApplication::updateCamera() { + + + glMatrixMode(GL_PROJECTION); + glLoadIdentity(); + btScalar rele = m_ele * btScalar(0.01745329251994329547);// rads per deg + btScalar razi = m_azi * btScalar(0.01745329251994329547);// rads per deg + + + btQuaternion rot(m_cameraUp,razi); + + + btVector3 eyePos(0,0,0); + eyePos[m_forwardAxis] = -m_cameraDistance; + + btVector3 forward(eyePos[0],eyePos[1],eyePos[2]); + if (forward.length2() < SIMD_EPSILON) + { + forward.setValue(1.f,0.f,0.f); + } + btVector3 right = m_cameraUp.cross(forward); + btQuaternion roll(right,-rele); + + eyePos = btMatrix3x3(rot) * btMatrix3x3(roll) * eyePos; + + m_cameraPosition[0] = eyePos.getX(); + m_cameraPosition[1] = eyePos.getY(); + m_cameraPosition[2] = eyePos.getZ(); + m_cameraPosition += m_cameraTargetPosition; + + if (m_glutScreenWidth == 0 && m_glutScreenHeight == 0) + return; + + btScalar aspect; + btVector3 extents; + + aspect = m_glutScreenWidth / (btScalar)m_glutScreenHeight; + extents.setValue(aspect * 1.0f, 1.0f,0); + + + if (m_ortho) + { + // reset matrix + glLoadIdentity(); + + + extents *= m_cameraDistance; + btVector3 lower = m_cameraTargetPosition - extents; + btVector3 upper = m_cameraTargetPosition + extents; + //gluOrtho2D(lower.x, upper.x, lower.y, upper.y); + glOrtho(lower.getX(), upper.getX(), lower.getY(), upper.getY(),-1000,1000); + + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); + //glTranslatef(100,210,0); + } else + { +// glFrustum (-aspect, aspect, -1.0, 1.0, 1.0, 10000.0); + glFrustum (-aspect * m_frustumZNear, aspect * m_frustumZNear, -m_frustumZNear, m_frustumZNear, m_frustumZNear, m_frustumZFar); + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); + gluLookAt(m_cameraPosition[0], m_cameraPosition[1], m_cameraPosition[2], + m_cameraTargetPosition[0], m_cameraTargetPosition[1], m_cameraTargetPosition[2], + m_cameraUp.getX(),m_cameraUp.getY(),m_cameraUp.getZ()); + } + +} + + + +const float STEPSIZE = 5; + +void DemoApplication::stepLeft() +{ + m_azi -= STEPSIZE; if (m_azi < 0) m_azi += 360; updateCamera(); +} +void DemoApplication::stepRight() +{ + m_azi += STEPSIZE; if (m_azi >= 360) m_azi -= 360; updateCamera(); +} +void DemoApplication::stepFront() +{ + m_ele += STEPSIZE; if (m_ele >= 360) m_ele -= 360; updateCamera(); +} +void DemoApplication::stepBack() +{ + m_ele -= STEPSIZE; if (m_ele < 0) m_ele += 360; updateCamera(); +} +void DemoApplication::zoomIn() +{ + m_cameraDistance -= btScalar(0.4); updateCamera(); + if (m_cameraDistance < btScalar(0.1)) + m_cameraDistance = btScalar(0.1); + +} +void DemoApplication::zoomOut() +{ + m_cameraDistance += btScalar(0.4); updateCamera(); + +} + + + + + + + + + + +void DemoApplication::reshape(int w, int h) +{ + GLDebugResetFont(w,h); + + m_glutScreenWidth = w; + m_glutScreenHeight = h; + + glViewport(0, 0, w, h); + updateCamera(); +} + + +void DemoApplication::keyboardCallback(unsigned char key, int x, int y) +{ + (void)x; + (void)y; + + m_lastKey = 0; + +#ifndef BT_NO_PROFILE + if (key >= 0x31 && key <= 0x39) + { + int child = key-0x31; + m_profileIterator->Enter_Child(child); + } + if (key==0x30) + { + m_profileIterator->Enter_Parent(); + } +#endif //BT_NO_PROFILE + + switch (key) + { + case 'q' : +#ifdef BT_USE_FREEGLUT + //return from glutMainLoop(), detect memory leaks etc. + glutLeaveMainLoop(); +#else + exit(0); +#endif + break; + + case 'l' : stepLeft(); break; + case 'r' : stepRight(); break; + case 'f' : stepFront(); break; + case 'b' : stepBack(); break; + case 'z' : zoomIn(); break; + case 'x' : zoomOut(); break; + case 'i' : toggleIdle(); break; + case 'g' : m_enableshadows=!m_enableshadows;break; + case 'u' : m_shapeDrawer->enableTexture(!m_shapeDrawer->enableTexture(false));break; + case 'h': + if (m_debugMode & btIDebugDraw::DBG_NoHelpText) + m_debugMode = m_debugMode & (~btIDebugDraw::DBG_NoHelpText); + else + m_debugMode |= btIDebugDraw::DBG_NoHelpText; + break; + + case 'w': + if (m_debugMode & btIDebugDraw::DBG_DrawWireframe) + m_debugMode = m_debugMode & (~btIDebugDraw::DBG_DrawWireframe); + else + m_debugMode |= btIDebugDraw::DBG_DrawWireframe; + break; + + case 'p': + if (m_debugMode & btIDebugDraw::DBG_ProfileTimings) + m_debugMode = m_debugMode & (~btIDebugDraw::DBG_ProfileTimings); + else + m_debugMode |= btIDebugDraw::DBG_ProfileTimings; + break; + + case '=': + { + int maxSerializeBufferSize = 1024*1024*5; + btDefaultSerializer* serializer = new btDefaultSerializer(maxSerializeBufferSize); + //serializer->setSerializationFlags(BT_SERIALIZE_NO_DUPLICATE_ASSERT); + m_dynamicsWorld->serialize(serializer); + FILE* f2 = fopen("testFile.bullet","wb"); + fwrite(serializer->getBufferPointer(),serializer->getCurrentBufferSize(),1,f2); + fclose(f2); + delete serializer; + break; + + } + + case 'm': + if (m_debugMode & btIDebugDraw::DBG_EnableSatComparison) + m_debugMode = m_debugMode & (~btIDebugDraw::DBG_EnableSatComparison); + else + m_debugMode |= btIDebugDraw::DBG_EnableSatComparison; + break; + + case 'n': + if (m_debugMode & btIDebugDraw::DBG_DisableBulletLCP) + m_debugMode = m_debugMode & (~btIDebugDraw::DBG_DisableBulletLCP); + else + m_debugMode |= btIDebugDraw::DBG_DisableBulletLCP; + break; + + case 't' : + if (m_debugMode & btIDebugDraw::DBG_DrawText) + m_debugMode = m_debugMode & (~btIDebugDraw::DBG_DrawText); + else + m_debugMode |= btIDebugDraw::DBG_DrawText; + break; + case 'y': + if (m_debugMode & btIDebugDraw::DBG_DrawFeaturesText) + m_debugMode = m_debugMode & (~btIDebugDraw::DBG_DrawFeaturesText); + else + m_debugMode |= btIDebugDraw::DBG_DrawFeaturesText; + break; + case 'a': + if (m_debugMode & btIDebugDraw::DBG_DrawAabb) + m_debugMode = m_debugMode & (~btIDebugDraw::DBG_DrawAabb); + else + m_debugMode |= btIDebugDraw::DBG_DrawAabb; + break; + case 'c' : + if (m_debugMode & btIDebugDraw::DBG_DrawContactPoints) + m_debugMode = m_debugMode & (~btIDebugDraw::DBG_DrawContactPoints); + else + m_debugMode |= btIDebugDraw::DBG_DrawContactPoints; + break; + case 'C' : + if (m_debugMode & btIDebugDraw::DBG_DrawConstraints) + m_debugMode = m_debugMode & (~btIDebugDraw::DBG_DrawConstraints); + else + m_debugMode |= btIDebugDraw::DBG_DrawConstraints; + break; + case 'L' : + if (m_debugMode & btIDebugDraw::DBG_DrawConstraintLimits) + m_debugMode = m_debugMode & (~btIDebugDraw::DBG_DrawConstraintLimits); + else + m_debugMode |= btIDebugDraw::DBG_DrawConstraintLimits; + break; + + case 'd' : + if (m_debugMode & btIDebugDraw::DBG_NoDeactivation) + m_debugMode = m_debugMode & (~btIDebugDraw::DBG_NoDeactivation); + else + m_debugMode |= btIDebugDraw::DBG_NoDeactivation; + if (m_debugMode & btIDebugDraw::DBG_NoDeactivation) + { + gDisableDeactivation = true; + } else + { + gDisableDeactivation = false; + } + break; + + + + + case 'o' : + { + m_ortho = !m_ortho;//m_stepping = !m_stepping; + break; + } + case 's' : clientMoveAndDisplay(); break; + // case ' ' : newRandom(); break; + case ' ': + clientResetScene(); + break; + case '1': + { + if (m_debugMode & btIDebugDraw::DBG_EnableCCD) + m_debugMode = m_debugMode & (~btIDebugDraw::DBG_EnableCCD); + else + m_debugMode |= btIDebugDraw::DBG_EnableCCD; + break; + } + + case '.': + { + shootBox(getRayTo(x,y));//getCameraTargetPosition()); + break; + } + + case '+': + { + m_ShootBoxInitialSpeed += 10.f; + break; + } + case '-': + { + m_ShootBoxInitialSpeed -= 10.f; + break; + } + + default: + // std::cout << "unused key : " << key << std::endl; + break; + } + + if (getDynamicsWorld() && getDynamicsWorld()->getDebugDrawer()) + getDynamicsWorld()->getDebugDrawer()->setDebugMode(m_debugMode); + + + +} + +void DemoApplication::setDebugMode(int mode) +{ + m_debugMode = mode; + if (getDynamicsWorld() && getDynamicsWorld()->getDebugDrawer()) + getDynamicsWorld()->getDebugDrawer()->setDebugMode(mode); +} + + + + + + +void DemoApplication::moveAndDisplay() +{ + if (!m_idle) + clientMoveAndDisplay(); + else + displayCallback(); +} + + + + +void DemoApplication::displayCallback() +{ +} + +#define NUM_SPHERES_ON_DIAGONAL 9 + +void DemoApplication::setShootBoxShape () +{ + if (!m_shootBoxShape) + { + btBoxShape* box = new btBoxShape(btVector3(.5f,.5f,.5f)); + box->initializePolyhedralFeatures(); + m_shootBoxShape = box; + } +} + +void DemoApplication::shootBox(const btVector3& destination) +{ + + if (m_dynamicsWorld) + { + float mass = 1.f; + btTransform startTransform; + startTransform.setIdentity(); + btVector3 camPos = getCameraPosition(); + startTransform.setOrigin(camPos); + + setShootBoxShape (); + + btRigidBody* body = this->localCreateRigidBody(mass, startTransform,m_shootBoxShape); + body->setLinearFactor(btVector3(1,1,1)); + //body->setRestitution(1); + + btVector3 linVel(destination[0]-camPos[0],destination[1]-camPos[1],destination[2]-camPos[2]); + linVel.normalize(); + linVel*=m_ShootBoxInitialSpeed; + + body->getWorldTransform().setOrigin(camPos); + body->getWorldTransform().setRotation(btQuaternion(0,0,0,1)); + body->setLinearVelocity(linVel); + body->setAngularVelocity(btVector3(0,0,0)); + body->setCcdMotionThreshold(0.5); + body->setCcdSweptSphereRadius(0.9f); +// printf("shootBox uid=%d\n", body->getBroadphaseHandle()->getUid()); +// printf("camPos=%f,%f,%f\n",camPos.getX(),camPos.getY(),camPos.getZ()); +// printf("destination=%f,%f,%f\n",destination.getX(),destination.getY(),destination.getZ()); + + } +} + + +int gPickingConstraintId = 0; +btVector3 gOldPickingPos; +btVector3 gHitPos(-1,-1,-1); +float gOldPickingDist = 0.f; +btRigidBody* pickedBody = 0;//for deactivation state + + +btVector3 DemoApplication::getRayTo(int x,int y) +{ + + + + if (m_ortho) + { + + btScalar aspect; + btVector3 extents; + aspect = m_glutScreenWidth / (btScalar)m_glutScreenHeight; + extents.setValue(aspect * 1.0f, 1.0f,0); + + extents *= m_cameraDistance; + btVector3 lower = m_cameraTargetPosition - extents; + btVector3 upper = m_cameraTargetPosition + extents; + + btScalar u = x / btScalar(m_glutScreenWidth); + btScalar v = (m_glutScreenHeight - y) / btScalar(m_glutScreenHeight); + + btVector3 p(0,0,0); + p.setValue((1.0f - u) * lower.getX() + u * upper.getX(),(1.0f - v) * lower.getY() + v * upper.getY(),m_cameraTargetPosition.getZ()); + return p; + } + + float top = 1.f; + float bottom = -1.f; + float nearPlane = 1.f; + float tanFov = (top-bottom)*0.5f / nearPlane; + float fov = btScalar(2.0) * btAtan(tanFov); + + btVector3 rayFrom = getCameraPosition(); + btVector3 rayForward = (getCameraTargetPosition()-getCameraPosition()); + rayForward.normalize(); + float farPlane = 10000.f; + rayForward*= farPlane; + + btVector3 rightOffset; + btVector3 vertical = m_cameraUp; + + btVector3 hor; + hor = rayForward.cross(vertical); + hor.normalize(); + vertical = hor.cross(rayForward); + vertical.normalize(); + + float tanfov = tanf(0.5f*fov); + + + hor *= 2.f * farPlane * tanfov; + vertical *= 2.f * farPlane * tanfov; + + btScalar aspect; + + aspect = m_glutScreenWidth / (btScalar)m_glutScreenHeight; + + hor*=aspect; + + + btVector3 rayToCenter = rayFrom + rayForward; + btVector3 dHor = hor * 1.f/float(m_glutScreenWidth); + btVector3 dVert = vertical * 1.f/float(m_glutScreenHeight); + + + btVector3 rayTo = rayToCenter - 0.5f * hor + 0.5f * vertical; + rayTo += btScalar(x) * dHor; + rayTo -= btScalar(y) * dVert; + return rayTo; +} + +btScalar mousePickClamping = 30.f; + + +void DemoApplication::mouseFunc(int button, int state, int x, int y) +{ + if (state == 0) + { + m_mouseButtons |= 1<rayTest(m_cameraPosition,rayTo,rayCallback); + if (rayCallback.hasHit()) + { + + btRigidBody* body = btRigidBody::upcast(rayCallback.m_collisionObject); + if (body) + { + body->setActivationState(ACTIVE_TAG); + btVector3 impulse = rayTo; + impulse.normalize(); + float impulseStrength = 10.f; + impulse *= impulseStrength; + btVector3 relPos = rayCallback.m_hitPointWorld - body->getCenterOfMassPosition(); + body->applyImpulse(impulse,relPos); + } + } + } +#endif + + + + } else + { + + } + break; + } + case 0: + { + if (state==0) + { + + + //add a point to point constraint for picking + if (m_dynamicsWorld) + { + + btVector3 rayFrom; + if (m_ortho) + { + rayFrom = rayTo; + rayFrom.setZ(-100.f); + } else + { + rayFrom = m_cameraPosition; + } + + btCollisionWorld::ClosestRayResultCallback rayCallback(rayFrom,rayTo); + m_dynamicsWorld->rayTest(rayFrom,rayTo,rayCallback); + if (rayCallback.hasHit()) + { + + + btRigidBody* body = btRigidBody::upcast(rayCallback.m_collisionObject); + if (body) + { + //other exclusions? + if (!(body->isStaticObject() || body->isKinematicObject())) + { + pickedBody = body; + pickedBody->setActivationState(DISABLE_DEACTIVATION); + + + btVector3 pickPos = rayCallback.m_hitPointWorld; + //printf("pickPos=%f,%f,%f\n",pickPos.getX(),pickPos.getY(),pickPos.getZ()); + + + btVector3 localPivot = body->getCenterOfMassTransform().inverse() * pickPos; + + + + + + + if (use6Dof) + { + btTransform tr; + tr.setIdentity(); + tr.setOrigin(localPivot); + btGeneric6DofConstraint* dof6 = new btGeneric6DofConstraint(*body, tr,false); + dof6->setLinearLowerLimit(btVector3(0,0,0)); + dof6->setLinearUpperLimit(btVector3(0,0,0)); + dof6->setAngularLowerLimit(btVector3(0,0,0)); + dof6->setAngularUpperLimit(btVector3(0,0,0)); + + m_dynamicsWorld->addConstraint(dof6); + m_pickConstraint = dof6; + + dof6->setParam(BT_CONSTRAINT_STOP_CFM,0.8,0); + dof6->setParam(BT_CONSTRAINT_STOP_CFM,0.8,1); + dof6->setParam(BT_CONSTRAINT_STOP_CFM,0.8,2); + dof6->setParam(BT_CONSTRAINT_STOP_CFM,0.8,3); + dof6->setParam(BT_CONSTRAINT_STOP_CFM,0.8,4); + dof6->setParam(BT_CONSTRAINT_STOP_CFM,0.8,5); + + dof6->setParam(BT_CONSTRAINT_STOP_ERP,0.1,0); + dof6->setParam(BT_CONSTRAINT_STOP_ERP,0.1,1); + dof6->setParam(BT_CONSTRAINT_STOP_ERP,0.1,2); + dof6->setParam(BT_CONSTRAINT_STOP_ERP,0.1,3); + dof6->setParam(BT_CONSTRAINT_STOP_ERP,0.1,4); + dof6->setParam(BT_CONSTRAINT_STOP_ERP,0.1,5); + } else + { + btPoint2PointConstraint* p2p = new btPoint2PointConstraint(*body,localPivot); + m_dynamicsWorld->addConstraint(p2p); + m_pickConstraint = p2p; + p2p->m_setting.m_impulseClamp = mousePickClamping; + //very weak constraint for picking + p2p->m_setting.m_tau = 0.001f; +/* + p2p->setParam(BT_CONSTRAINT_CFM,0.8,0); + p2p->setParam(BT_CONSTRAINT_CFM,0.8,1); + p2p->setParam(BT_CONSTRAINT_CFM,0.8,2); + p2p->setParam(BT_CONSTRAINT_ERP,0.1,0); + p2p->setParam(BT_CONSTRAINT_ERP,0.1,1); + p2p->setParam(BT_CONSTRAINT_ERP,0.1,2); + */ + + + } + use6Dof = !use6Dof; + + //save mouse position for dragging + gOldPickingPos = rayTo; + gHitPos = pickPos; + + gOldPickingDist = (pickPos-rayFrom).length(); + } + } + } + } + + } else + { + removePickingConstraint(); + } + + break; + + } + default: + { + } + } + +} + +void DemoApplication::removePickingConstraint() +{ + if (m_pickConstraint && m_dynamicsWorld) + { + m_dynamicsWorld->removeConstraint(m_pickConstraint); + delete m_pickConstraint; + //printf("removed constraint %i",gPickingConstraintId); + m_pickConstraint = 0; + pickedBody->forceActivationState(ACTIVE_TAG); + pickedBody->setDeactivationTime( 0.f ); + pickedBody = 0; + } +} + +void DemoApplication::mouseMotionFunc(int x,int y) +{ + + if (m_pickConstraint) + { + //move the constraint pivot + + if (m_pickConstraint->getConstraintType() == D6_CONSTRAINT_TYPE) + { + btGeneric6DofConstraint* pickCon = static_cast(m_pickConstraint); + if (pickCon) + { + //keep it at the same picking distance + + btVector3 newRayTo = getRayTo(x,y); + btVector3 rayFrom; + btVector3 oldPivotInB = pickCon->getFrameOffsetA().getOrigin(); + + btVector3 newPivotB; + if (m_ortho) + { + newPivotB = oldPivotInB; + newPivotB.setX(newRayTo.getX()); + newPivotB.setY(newRayTo.getY()); + } else + { + rayFrom = m_cameraPosition; + btVector3 dir = newRayTo-rayFrom; + dir.normalize(); + dir *= gOldPickingDist; + + newPivotB = rayFrom + dir; + } + pickCon->getFrameOffsetA().setOrigin(newPivotB); + } + + } else + { + btPoint2PointConstraint* pickCon = static_cast(m_pickConstraint); + if (pickCon) + { + //keep it at the same picking distance + + btVector3 newRayTo = getRayTo(x,y); + btVector3 rayFrom; + btVector3 oldPivotInB = pickCon->getPivotInB(); + btVector3 newPivotB; + if (m_ortho) + { + newPivotB = oldPivotInB; + newPivotB.setX(newRayTo.getX()); + newPivotB.setY(newRayTo.getY()); + } else + { + rayFrom = m_cameraPosition; + btVector3 dir = newRayTo-rayFrom; + dir.normalize(); + dir *= gOldPickingDist; + + newPivotB = rayFrom + dir; + } + pickCon->setPivotB(newPivotB); + } + } + } + + float dx, dy; + dx = btScalar(x) - m_mouseOldX; + dy = btScalar(y) - m_mouseOldY; + + + ///only if ALT key is pressed (Maya style) + if (m_modifierKeys& BT_ACTIVE_ALT) + { + if(m_mouseButtons & 2) + { + btVector3 hor = getRayTo(0,0)-getRayTo(1,0); + btVector3 vert = getRayTo(0,0)-getRayTo(0,1); + btScalar multiplierX = btScalar(0.001); + btScalar multiplierY = btScalar(0.001); + if (m_ortho) + { + multiplierX = 1; + multiplierY = 1; + } + + + m_cameraTargetPosition += hor* dx * multiplierX; + m_cameraTargetPosition += vert* dy * multiplierY; + } + + if(m_mouseButtons & (2 << 2) && m_mouseButtons & 1) + { + } + else if(m_mouseButtons & 1) + { + m_azi += dx * btScalar(0.2); + m_azi = fmodf(m_azi, btScalar(360.f)); + m_ele += dy * btScalar(0.2); + m_ele = fmodf(m_ele, btScalar(180.f)); + } + else if(m_mouseButtons & 4) + { + m_cameraDistance -= dy * btScalar(0.02f); + if (m_cameraDistancegetShapeType() != INVALID_SHAPE_PROXYTYPE)); + + //rigidbody is dynamic if and only if mass is non zero, otherwise static + bool isDynamic = (mass != 0.f); + + btVector3 localInertia(0,0,0); + if (isDynamic) + shape->calculateLocalInertia(mass,localInertia); + + //using motionstate is recommended, it provides interpolation capabilities, and only synchronizes 'active' objects + +#define USE_MOTIONSTATE 1 +#ifdef USE_MOTIONSTATE + btDefaultMotionState* myMotionState = new btDefaultMotionState(startTransform); + + btRigidBody::btRigidBodyConstructionInfo cInfo(mass,myMotionState,shape,localInertia); + + btRigidBody* body = new btRigidBody(cInfo); + body->setContactProcessingThreshold(m_defaultContactProcessingThreshold); + +#else + btRigidBody* body = new btRigidBody(mass,0,shape,localInertia); + body->setWorldTransform(startTransform); +#endif// + + m_dynamicsWorld->addRigidBody(body); + + return body; +} + +//See http://www.lighthouse3d.com/opengl/glut/index.php?bmpfontortho +void DemoApplication::setOrthographicProjection() +{ + + // switch to projection mode + glMatrixMode(GL_PROJECTION); + + // save previous matrix which contains the + //settings for the perspective projection + glPushMatrix(); + // reset matrix + glLoadIdentity(); + // set a 2D orthographic projection + gluOrtho2D(0, m_glutScreenWidth, 0, m_glutScreenHeight); + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); + + // invert the y axis, down is positive + glScalef(1, -1, 1); + // mover the origin from the bottom left corner + // to the upper left corner + glTranslatef(btScalar(0), btScalar(-m_glutScreenHeight), btScalar(0)); + +} + +void DemoApplication::resetPerspectiveProjection() +{ + + glMatrixMode(GL_PROJECTION); + glPopMatrix(); + glMatrixMode(GL_MODELVIEW); + updateCamera(); +} + + + + +extern CProfileIterator * m_profileIterator; + +void DemoApplication::displayProfileString(int xOffset,int yStart,char* message) +{ + glRasterPos3f(btScalar(xOffset),btScalar(yStart),btScalar(0)); + GLDebugDrawString(xOffset,yStart,message); +} + + +float DemoApplication::showProfileInfo(int& xOffset,int& yStart, int yIncr) +{ +#ifndef BT_NO_PROFILE + + static double time_since_reset = 0.f; + if (!m_idle) + { + time_since_reset = CProfileManager::Get_Time_Since_Reset(); + } + + + { + //recompute profiling data, and store profile strings + + char blockTime[128]; + + double totalTime = 0; + + int frames_since_reset = CProfileManager::Get_Frame_Count_Since_Reset(); + + m_profileIterator->First(); + + double parent_time = m_profileIterator->Is_Root() ? time_since_reset : m_profileIterator->Get_Current_Parent_Total_Time(); + + { + sprintf(blockTime,"--- Profiling: %s (total running time: %.3f ms) ---", m_profileIterator->Get_Current_Parent_Name(), parent_time ); + displayProfileString(xOffset,yStart,blockTime); + yStart += yIncr; + sprintf(blockTime,"press (1,2...) to display child timings, or 0 for parent" ); + displayProfileString(xOffset,yStart,blockTime); + yStart += yIncr; + + } + + + double accumulated_time = 0.f; + + for (int i = 0; !m_profileIterator->Is_Done(); m_profileIterator->Next()) + { + double current_total_time = m_profileIterator->Get_Current_Total_Time(); + accumulated_time += current_total_time; + double fraction = parent_time > SIMD_EPSILON ? (current_total_time / parent_time) * 100 : 0.f; + + sprintf(blockTime,"%d -- %s (%.2f %%) :: %.3f ms / frame (%d calls)", + ++i, m_profileIterator->Get_Current_Name(), fraction, + (current_total_time / (double)frames_since_reset),m_profileIterator->Get_Current_Total_Calls()); + displayProfileString(xOffset,yStart,blockTime); + yStart += yIncr; + totalTime += current_total_time; + } + + sprintf(blockTime,"%s (%.3f %%) :: %.3f ms", "Unaccounted", + // (min(0, time_since_reset - totalTime) / time_since_reset) * 100); + parent_time > SIMD_EPSILON ? ((parent_time - accumulated_time) / parent_time) * 100 : 0.f, parent_time - accumulated_time); + + displayProfileString(xOffset,yStart,blockTime); + yStart += yIncr; + + + + sprintf(blockTime,"-------------------------------------------------"); + displayProfileString(xOffset,yStart,blockTime); + yStart += yIncr; + + } +#endif//BT_NO_PROFILE + + return time_since_reset; + + +} + + +// +void DemoApplication::renderscene(int pass) +{ + btScalar m[16]; + btMatrix3x3 rot;rot.setIdentity(); + const int numObjects=m_dynamicsWorld->getNumCollisionObjects(); + btVector3 wireColor(1,0,0); + for(int i=0;igetCollisionObjectArray()[i]; + btRigidBody* body=btRigidBody::upcast(colObj); + if(body&&body->getMotionState()) + { + btDefaultMotionState* myMotionState = (btDefaultMotionState*)body->getMotionState(); + myMotionState->m_graphicsWorldTrans.getOpenGLMatrix(m); + rot=myMotionState->m_graphicsWorldTrans.getBasis(); + } + else + { + colObj->getWorldTransform().getOpenGLMatrix(m); + rot=colObj->getWorldTransform().getBasis(); + } + btVector3 wireColor(1.f,1.0f,0.5f); //wants deactivation + if(i&1) wireColor=btVector3(0.f,0.0f,1.f); + ///color differently for active, sleeping, wantsdeactivation states + if (colObj->getActivationState() == 1) //active + { + if (i & 1) + { + wireColor += btVector3 (1.f,0.f,0.f); + } + else + { + wireColor += btVector3 (.5f,0.f,0.f); + } + } + if(colObj->getActivationState()==2) //ISLAND_SLEEPING + { + if(i&1) + { + wireColor += btVector3 (0.f,1.f, 0.f); + } + else + { + wireColor += btVector3 (0.f,0.5f,0.f); + } + } + + btVector3 aabbMin,aabbMax; + m_dynamicsWorld->getBroadphase()->getBroadphaseAabb(aabbMin,aabbMax); + + aabbMin-=btVector3(BT_LARGE_FLOAT,BT_LARGE_FLOAT,BT_LARGE_FLOAT); + aabbMax+=btVector3(BT_LARGE_FLOAT,BT_LARGE_FLOAT,BT_LARGE_FLOAT); +// printf("aabbMin=(%f,%f,%f)\n",aabbMin.getX(),aabbMin.getY(),aabbMin.getZ()); +// printf("aabbMax=(%f,%f,%f)\n",aabbMax.getX(),aabbMax.getY(),aabbMax.getZ()); +// m_dynamicsWorld->getDebugDrawer()->drawAabb(aabbMin,aabbMax,btVector3(1,1,1)); + + + if (!(getDebugMode()& btIDebugDraw::DBG_DrawWireframe)) + { + switch(pass) + { + case 0: m_shapeDrawer->drawOpenGL(m,colObj->getCollisionShape(),wireColor,getDebugMode(),aabbMin,aabbMax);break; + case 1: m_shapeDrawer->drawShadow(m,m_sundirection*rot,colObj->getCollisionShape(),aabbMin,aabbMax);break; + case 2: m_shapeDrawer->drawOpenGL(m,colObj->getCollisionShape(),wireColor*btScalar(0.3),0,aabbMin,aabbMax);break; + } + } + } +} + +// +void DemoApplication::renderme() +{ + myinit(); + + updateCamera(); + + if (m_dynamicsWorld) + { + if(m_enableshadows) + { + glClear(GL_STENCIL_BUFFER_BIT); + glEnable(GL_CULL_FACE); + renderscene(0); + + glDisable(GL_LIGHTING); + glDepthMask(GL_FALSE); + glDepthFunc(GL_LEQUAL); + glEnable(GL_STENCIL_TEST); + glColorMask(GL_FALSE,GL_FALSE,GL_FALSE,GL_FALSE); + glStencilFunc(GL_ALWAYS,1,0xFFFFFFFFL); + glFrontFace(GL_CCW); + glStencilOp(GL_KEEP,GL_KEEP,GL_INCR); + renderscene(1); + glFrontFace(GL_CW); + glStencilOp(GL_KEEP,GL_KEEP,GL_DECR); + renderscene(1); + glFrontFace(GL_CCW); + + glPolygonMode(GL_FRONT,GL_FILL); + glPolygonMode(GL_BACK,GL_FILL); + glShadeModel(GL_SMOOTH); + glEnable(GL_DEPTH_TEST); + glDepthFunc(GL_LESS); + glEnable(GL_LIGHTING); + glDepthMask(GL_TRUE); + glCullFace(GL_BACK); + glFrontFace(GL_CCW); + glEnable(GL_CULL_FACE); + glColorMask(GL_TRUE,GL_TRUE,GL_TRUE,GL_TRUE); + + glDepthFunc(GL_LEQUAL); + glStencilFunc( GL_NOTEQUAL, 0, 0xFFFFFFFFL ); + glStencilOp( GL_KEEP, GL_KEEP, GL_KEEP ); + glDisable(GL_LIGHTING); + renderscene(2); + glEnable(GL_LIGHTING); + glDepthFunc(GL_LESS); + glDisable(GL_STENCIL_TEST); + glDisable(GL_CULL_FACE); + } + else + { + glDisable(GL_CULL_FACE); + renderscene(0); + } + + int xOffset = 10; + int yStart = 20; + int yIncr = 20; + + + glDisable(GL_LIGHTING); + glColor3f(0, 0, 0); + + if ((m_debugMode & btIDebugDraw::DBG_NoHelpText)==0) + { + setOrthographicProjection(); + + showProfileInfo(xOffset,yStart,yIncr); + +#ifdef USE_QUICKPROF + + + if ( getDebugMode() & btIDebugDraw::DBG_ProfileTimings) + { + static int counter = 0; + counter++; + std::map::iterator iter; + for (iter = btProfiler::mProfileBlocks.begin(); iter != btProfiler::mProfileBlocks.end(); ++iter) + { + char blockTime[128]; + sprintf(blockTime, "%s: %lf",&((*iter).first[0]),btProfiler::getBlockTime((*iter).first, btProfiler::BLOCK_CYCLE_SECONDS));//BLOCK_TOTAL_PERCENT)); + glRasterPos3f(xOffset,yStart,0); + GLDebugDrawString(BMF_GetFont(BMF_kHelvetica10),blockTime); + yStart += yIncr; + + } + + } +#endif //USE_QUICKPROF + + + + + resetPerspectiveProjection(); + } + + glDisable(GL_LIGHTING); + + + } + + updateCamera(); + +} + +#include "BulletCollision/BroadphaseCollision/btAxisSweep3.h" + + +void DemoApplication::clientResetScene() +{ + removePickingConstraint(); + +#ifdef SHOW_NUM_DEEP_PENETRATIONS + gNumDeepPenetrationChecks = 0; + gNumGjkChecks = 0; +#endif //SHOW_NUM_DEEP_PENETRATIONS + + gNumClampedCcdMotions = 0; + int numObjects = 0; + int i; + + if (m_dynamicsWorld) + { + int numConstraints = m_dynamicsWorld->getNumConstraints(); + for (i=0;igetConstraint(0)->setEnabled(true); + } + numObjects = m_dynamicsWorld->getNumCollisionObjects(); + + ///create a copy of the array, not a reference! + btCollisionObjectArray copyArray = m_dynamicsWorld->getCollisionObjectArray(); + + + + + for (i=0;igetMotionState()) + { + btDefaultMotionState* myMotionState = (btDefaultMotionState*)body->getMotionState(); + myMotionState->m_graphicsWorldTrans = myMotionState->m_startWorldTrans; + body->setCenterOfMassTransform( myMotionState->m_graphicsWorldTrans ); + colObj->setInterpolationWorldTransform( myMotionState->m_startWorldTrans ); + colObj->forceActivationState(ACTIVE_TAG); + colObj->activate(); + colObj->setDeactivationTime(0); + //colObj->setActivationState(WANTS_DEACTIVATION); + } + //removed cached contact points (this is not necessary if all objects have been removed from the dynamics world) + if (m_dynamicsWorld->getBroadphase()->getOverlappingPairCache()) + m_dynamicsWorld->getBroadphase()->getOverlappingPairCache()->cleanProxyFromPairs(colObj->getBroadphaseHandle(),getDynamicsWorld()->getDispatcher()); + + btRigidBody* body = btRigidBody::upcast(colObj); + if (body && !body->isStaticObject()) + { + btRigidBody::upcast(colObj)->setLinearVelocity(btVector3(0,0,0)); + btRigidBody::upcast(colObj)->setAngularVelocity(btVector3(0,0,0)); + } + } + + } + + ///reset some internal cached data in the broadphase + m_dynamicsWorld->getBroadphase()->resetPool(getDynamicsWorld()->getDispatcher()); + m_dynamicsWorld->getConstraintSolver()->reset(); + + } + +} diff --git a/Extras/RigidBodyGpuPipeline/dynamics/testbed/DemoApplication.h b/Extras/RigidBodyGpuPipeline/dynamics/testbed/DemoApplication.h new file mode 100644 index 000000000..42903e3ec --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/testbed/DemoApplication.h @@ -0,0 +1,257 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#ifndef DEMO_APPLICATION_H +#define DEMO_APPLICATION_H + + +#include "GlutStuff.h" +#include "GL_ShapeDrawer.h" + +#include +#include +#include + + +#include "LinearMath/btVector3.h" +#include "LinearMath/btMatrix3x3.h" +#include "LinearMath/btTransform.h" +#include "LinearMath/btQuickprof.h" +#include "LinearMath/btAlignedObjectArray.h" + +class btCollisionShape; +class btDynamicsWorld; +class btRigidBody; +class btTypedConstraint; + + + +class DemoApplication +{ +protected: + void displayProfileString(int xOffset,int yStart,char* message); + class CProfileIterator* m_profileIterator; + + protected: +#ifdef USE_BT_CLOCK + btClock m_clock; +#endif //USE_BT_CLOCK + + ///this is the most important class + btDynamicsWorld* m_dynamicsWorld; + + ///constraint for mouse picking + btTypedConstraint* m_pickConstraint; + + virtual void removePickingConstraint(); + + btCollisionShape* m_shootBoxShape; + + float m_cameraDistance; + int m_debugMode; + + float m_ele; + float m_azi; + btVector3 m_cameraPosition; + btVector3 m_cameraTargetPosition;//look at + + int m_mouseOldX; + int m_mouseOldY; + int m_mouseButtons; +public: + int m_modifierKeys; +protected: + + float m_scaleBottom; + float m_scaleFactor; + btVector3 m_cameraUp; + int m_forwardAxis; + + int m_glutScreenWidth; + int m_glutScreenHeight; + + float m_frustumZNear; + float m_frustumZFar; + + int m_ortho; + + float m_ShootBoxInitialSpeed; + + bool m_stepping; + bool m_singleStep; + bool m_idle; + int m_lastKey; + + virtual float showProfileInfo(int& xOffset,int& yStart, int yIncr); + void renderscene(int pass); + + GL_ShapeDrawer* m_shapeDrawer; + bool m_enableshadows; + btVector3 m_sundirection; + btScalar m_defaultContactProcessingThreshold; + +public: + + DemoApplication(); + + virtual ~DemoApplication(); + + btDynamicsWorld* getDynamicsWorld() + { + return m_dynamicsWorld; + } + + virtual void initPhysics() = 0; + + virtual void setDrawClusters(bool drawClusters) + { + + } + + void overrideGLShapeDrawer (GL_ShapeDrawer* shapeDrawer); + + void setOrthographicProjection(); + void resetPerspectiveProjection(); + + bool setTexturing(bool enable) { return(m_shapeDrawer->enableTexture(enable)); } + bool setShadows(bool enable) { bool p=m_enableshadows;m_enableshadows=enable;return(p); } + bool getTexturing() const + { + return m_shapeDrawer->hasTextureEnabled(); + } + bool getShadows() const + { + return m_enableshadows; + } + + + int getDebugMode() + { + return m_debugMode ; + } + + void setDebugMode(int mode); + + void setAzi(float azi) + { + m_azi = azi; + } + + void setCameraUp(const btVector3& camUp) + { + m_cameraUp = camUp; + } + void setCameraForwardAxis(int axis) + { + m_forwardAxis = axis; + } + + virtual void myinit(); + + void toggleIdle(); + + virtual void updateCamera(); + + btVector3 getCameraPosition() + { + return m_cameraPosition; + } + btVector3 getCameraTargetPosition() + { + return m_cameraTargetPosition; + } + + btScalar getDeltaTimeMicroseconds() + { +#ifdef USE_BT_CLOCK + btScalar dt = (btScalar)m_clock.getTimeMicroseconds(); + m_clock.reset(); + return dt; +#else + return btScalar(16666.); +#endif + } + void setFrustumZPlanes(float zNear, float zFar) + { + m_frustumZNear = zNear; + m_frustumZFar = zFar; + } + + ///glut callbacks + + float getCameraDistance(); + void setCameraDistance(float dist); + void moveAndDisplay(); + + virtual void clientMoveAndDisplay() = 0; + + virtual void clientResetScene(); + + ///Demo functions + virtual void setShootBoxShape (); + virtual void shootBox(const btVector3& destination); + + + btVector3 getRayTo(int x,int y); + + btRigidBody* localCreateRigidBody(float mass, const btTransform& startTransform,btCollisionShape* shape); + + ///callback methods by glut + + virtual void keyboardCallback(unsigned char key, int x, int y); + + virtual void keyboardUpCallback(unsigned char key, int x, int y) {} + + virtual void specialKeyboard(int key, int x, int y){} + + virtual void specialKeyboardUp(int key, int x, int y){} + + virtual void reshape(int w, int h); + + virtual void mouseFunc(int button, int state, int x, int y); + + virtual void mouseMotionFunc(int x,int y); + + virtual void displayCallback(); + + virtual void renderme(); + + virtual void swapBuffers() = 0; + + virtual void updateModifierKeys() = 0; + + void stepLeft(); + void stepRight(); + void stepFront(); + void stepBack(); + void zoomIn(); + void zoomOut(); + + bool isIdle() const + { + return m_idle; + } + + void setIdle(bool idle) + { + m_idle = idle; + } + + +}; + +#endif //DEMO_APPLICATION_H + + diff --git a/Extras/RigidBodyGpuPipeline/dynamics/testbed/GLDebugDrawer.cpp b/Extras/RigidBodyGpuPipeline/dynamics/testbed/GLDebugDrawer.cpp new file mode 100644 index 000000000..79758df6c --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/testbed/GLDebugDrawer.cpp @@ -0,0 +1,139 @@ + +#include "GLDebugDrawer.h" +#include "GLDebugFont.h" +#include "GlutStuff.h" + + + +#include //printf debugging +GLDebugDrawer::GLDebugDrawer() +:m_debugMode(0) +{ + +} + +void GLDebugDrawer::drawLine(const btVector3& from,const btVector3& to,const btVector3& fromColor, const btVector3& toColor) +{ + glBegin(GL_LINES); + glColor3f(fromColor.getX(), fromColor.getY(), fromColor.getZ()); + glVertex3d(from.getX(), from.getY(), from.getZ()); + glColor3f(toColor.getX(), toColor.getY(), toColor.getZ()); + glVertex3d(to.getX(), to.getY(), to.getZ()); + glEnd(); +} + +void GLDebugDrawer::drawLine(const btVector3& from,const btVector3& to,const btVector3& color) +{ + drawLine(from,to,color,color); +} + +void GLDebugDrawer::drawSphere (const btVector3& p, btScalar radius, const btVector3& color) +{ + glColor4f (color.getX(), color.getY(), color.getZ(), btScalar(1.0f)); + glPushMatrix (); + glTranslatef (p.getX(), p.getY(), p.getZ()); + + int lats = 5; + int longs = 5; + + int i, j; + for(i = 0; i <= lats; i++) { + btScalar lat0 = SIMD_PI * (-btScalar(0.5) + (btScalar) (i - 1) / lats); + btScalar z0 = radius*sin(lat0); + btScalar zr0 = radius*cos(lat0); + + btScalar lat1 = SIMD_PI * (-btScalar(0.5) + (btScalar) i / lats); + btScalar z1 = radius*sin(lat1); + btScalar zr1 = radius*cos(lat1); + + glBegin(GL_QUAD_STRIP); + for(j = 0; j <= longs; j++) { + btScalar lng = 2 * SIMD_PI * (btScalar) (j - 1) / longs; + btScalar x = cos(lng); + btScalar y = sin(lng); + + glNormal3f(x * zr0, y * zr0, z0); + glVertex3f(x * zr0, y * zr0, z0); + glNormal3f(x * zr1, y * zr1, z1); + glVertex3f(x * zr1, y * zr1, z1); + } + glEnd(); + } + + glPopMatrix(); +} + +void GLDebugDrawer::drawBox (const btVector3& boxMin, const btVector3& boxMax, const btVector3& color, btScalar alpha) +{ + btVector3 halfExtent = (boxMax - boxMin) * btScalar(0.5f); + btVector3 center = (boxMax + boxMin) * btScalar(0.5f); + //glEnable(GL_BLEND); // Turn blending On + //glBlendFunc(GL_SRC_ALPHA, GL_ONE); + glColor4f (color.getX(), color.getY(), color.getZ(), alpha); + glPushMatrix (); + glTranslatef (center.getX(), center.getY(), center.getZ()); + glScaled(2*halfExtent[0], 2*halfExtent[1], 2*halfExtent[2]); +// glutSolidCube(1.0); + glPopMatrix (); + //glDisable(GL_BLEND); +} + +void GLDebugDrawer::drawTriangle(const btVector3& a,const btVector3& b,const btVector3& c,const btVector3& color,btScalar alpha) +{ +// if (m_debugMode > 0) + { + const btVector3 n=btCross(b-a,c-a).normalized(); + glBegin(GL_TRIANGLES); + glColor4f(color.getX(), color.getY(), color.getZ(),alpha); + glNormal3d(n.getX(),n.getY(),n.getZ()); + glVertex3d(a.getX(),a.getY(),a.getZ()); + glVertex3d(b.getX(),b.getY(),b.getZ()); + glVertex3d(c.getX(),c.getY(),c.getZ()); + glEnd(); + } +} + +void GLDebugDrawer::setDebugMode(int debugMode) +{ + m_debugMode = debugMode; + +} + +void GLDebugDrawer::draw3dText(const btVector3& location,const char* textString) +{ + glRasterPos3f(location.x(), location.y(), location.z()); + //BMF_DrawString(BMF_GetFont(BMF_kHelvetica10),textString); +} + +void GLDebugDrawer::reportErrorWarning(const char* warningString) +{ + printf("%s\n",warningString); +} + +void GLDebugDrawer::drawContactPoint(const btVector3& pointOnB,const btVector3& normalOnB,btScalar distance,int lifeTime,const btVector3& color) +{ + + { + btVector3 to=pointOnB+normalOnB*1;//distance; + const btVector3&from = pointOnB; + glColor4f(color.getX(), color.getY(), color.getZ(),1.f); + //glColor4f(0,0,0,1.f); + glBegin(GL_LINES); + glVertex3d(from.getX(), from.getY(), from.getZ()); + glVertex3d(to.getX(), to.getY(), to.getZ()); + glEnd(); + + +// glRasterPos3f(from.x(), from.y(), from.z()); +// char buf[12]; +// sprintf(buf," %d",lifeTime); + //BMF_DrawString(BMF_GetFont(BMF_kHelvetica10),buf); + + + } +} + + + + + diff --git a/Extras/RigidBodyGpuPipeline/dynamics/testbed/GLDebugDrawer.h b/Extras/RigidBodyGpuPipeline/dynamics/testbed/GLDebugDrawer.h new file mode 100644 index 000000000..2a05405a7 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/testbed/GLDebugDrawer.h @@ -0,0 +1,38 @@ +#ifndef GL_DEBUG_DRAWER_H +#define GL_DEBUG_DRAWER_H + +#include "LinearMath/btIDebugDraw.h" + + + +class GLDebugDrawer : public btIDebugDraw +{ + int m_debugMode; + +public: + + GLDebugDrawer(); + + + virtual void drawLine(const btVector3& from,const btVector3& to,const btVector3& fromColor, const btVector3& toColor); + + virtual void drawLine(const btVector3& from,const btVector3& to,const btVector3& color); + + virtual void drawSphere (const btVector3& p, btScalar radius, const btVector3& color); + virtual void drawBox (const btVector3& boxMin, const btVector3& boxMax, const btVector3& color, btScalar alpha); + + virtual void drawTriangle(const btVector3& a,const btVector3& b,const btVector3& c,const btVector3& color,btScalar alpha); + + virtual void drawContactPoint(const btVector3& PointOnB,const btVector3& normalOnB,btScalar distance,int lifeTime,const btVector3& color); + + virtual void reportErrorWarning(const char* warningString); + + virtual void draw3dText(const btVector3& location,const char* textString); + + virtual void setDebugMode(int debugMode); + + virtual int getDebugMode() const { return m_debugMode;} + +}; + +#endif//GL_DEBUG_DRAWER_H diff --git a/Extras/RigidBodyGpuPipeline/dynamics/testbed/GLDebugFont.cpp b/Extras/RigidBodyGpuPipeline/dynamics/testbed/GLDebugFont.cpp new file mode 100644 index 000000000..b62973182 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/testbed/GLDebugFont.cpp @@ -0,0 +1,1000 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include "GLDebugFont.h" + + +#ifdef _WIN32//for glut.h +#include +#endif + +//think different +#if defined(__APPLE__) && !defined (VMDMESA) +#include +#if (defined (TARGET_OS_IPHONE) && TARGET_OS_IPHONE) || (defined (TARGET_IPHONE_SIMULATOR) && TARGET_IPHONE_SIMULATOR) +#import +#define glOrtho glOrthof +#else +#include +#include +#include +#endif +#else + + + +#ifdef _WINDOWS +#include +#include +#include +#else +#include +#include +#endif +#endif + +#include +#include //for memset + +extern unsigned char sFontData[]; +static bool sTexturesInitialized = false; + +static GLuint sTexture = -1; +static int sScreenWidth = -1; +static int sScreenHeight = -1; + + +void GLDebugResetFont(int screenWidth,int screenHeight) +{ + + if ((sScreenWidth == screenWidth) && (sScreenHeight == screenHeight)) + return; + + sScreenWidth = screenWidth; + sScreenHeight = screenHeight; + + if (!sTexturesInitialized) + { + sTexturesInitialized = true; + glGenTextures(1, &sTexture); + glBindTexture(GL_TEXTURE_2D, sTexture); + glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_MAG_FILTER,GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_MIN_FILTER,GL_LINEAR); + glTexImage2D(GL_TEXTURE_2D, 0, 3, 256 , 256 , 0, GL_RGB, GL_UNSIGNED_BYTE, &sFontData[0]); + } + + printf("generating font at resolution %d,%d\n",screenWidth,screenHeight); + +} + +#define USE_ARRAYS 1 + +void GLDebugDrawStringInternal(int x,int y,const char* string, const btVector3& rgb) +{ + GLDebugDrawStringInternal(x,y,string,rgb,true,10); +} + +void GLDebugDrawStringInternal(int x,int y,const char* string, const btVector3& rgb, bool enableBlend, int spacing) +{ + + if (!sTexturesInitialized) + { + GLDebugResetFont(sScreenWidth,sScreenHeight); + } + if (strlen(string)) + { + + glColor4f(rgb.getX(),rgb.getY(),rgb.getZ(),1.f); + float cx; + float cy; + + glMatrixMode(GL_TEXTURE); + glLoadIdentity(); + + glDisable(GL_TEXTURE_GEN_S); + glDisable(GL_TEXTURE_GEN_T); + glDisable(GL_TEXTURE_GEN_R); + + glEnable(GL_TEXTURE_2D); + glBlendFunc(GL_SRC_ALPHA,GL_ONE); + glDepthFunc (GL_LEQUAL); + + if (enableBlend) + { + glEnable(GL_BLEND); + } else + { + glDisable(GL_BLEND); + } + glEnable (GL_DEPTH_TEST); + glBindTexture(GL_TEXTURE_2D, sTexture); + glDisable(GL_DEPTH_TEST); + glMatrixMode(GL_PROJECTION); + glPushMatrix(); + glLoadIdentity(); + + glOrtho(0,sScreenWidth,0,sScreenHeight,-1,1); + + glMatrixMode(GL_MODELVIEW); + glPushMatrix(); + glLoadIdentity(); + glTranslatef(btScalar(x),btScalar(sScreenHeight - y),btScalar(0)); + +#if USE_ARRAYS + + glDisableClientState(GL_COLOR_ARRAY); + glDisableClientState(GL_NORMAL_ARRAY); + glEnableClientState(GL_VERTEX_ARRAY); + glEnableClientState (GL_TEXTURE_COORD_ARRAY); +#endif + + GLfloat verts[] ={ + 0.0f, 1.0f, 0.0f, + -1.0f, -1.0f, 0.0f, + 1.0f, -1.0f, 0.0f, + 0.f,0.f,0.f + }; + + GLfloat uv_texcoords[] = { + 0,0, + 0,0, + 0,0, + 0,0 + }; + verts[0] = 0; verts[1] = 0; verts[2] = 0; + verts[3] = 16-1; verts[4] = 0; verts[5] = 0; + verts[6] = 16-1; verts[7] = 16-1; verts[8] = 0; + verts[9] = 0; verts[10] = 16-1; verts[11] = 0; + + for (int i=0;i=0) + { + cx=float(ch%16) * btScalar(1./16.f); + cy=float(ch/16) * btScalar(1./16.f); + + uv_texcoords[0] = cx; uv_texcoords[1] = btScalar(1-cy-1./16.f); + uv_texcoords[2] = btScalar(cx+1./16.f); uv_texcoords[3] = btScalar(1-cy-1./16.f); + uv_texcoords[4] = btScalar(cx+1./16.f); uv_texcoords[5] = btScalar(1-cy); + uv_texcoords[6] = cx; uv_texcoords[7] = btScalar(1-cy); +#if USE_ARRAYS + glTexCoordPointer(2,GL_FLOAT,0,uv_texcoords); + glVertexPointer(3, GL_FLOAT, 0, verts); + glDrawArrays(GL_TRIANGLE_FAN, 0, 4); +#else + glBegin(GL_QUADS); + glTexCoord2f(cx,1-cy-1./16.f); + + glVertex2i(0,0); + glTexCoord2f(cx+1./16.f,1-cy-1./16.f); + + glVertex2i(16 - 1,0); + glTexCoord2f(cx+1./16.f,1-cy); + + glVertex2i(16 - 1,16 -1); + glTexCoord2f(cx,1-cy); + + glVertex2i(0,16 -1); + glEnd(); +#endif + + glTranslatef(spacing,0,0); + } + } + + glMatrixMode(GL_PROJECTION); + glPopMatrix(); + glMatrixMode(GL_MODELVIEW); + glPopMatrix(); +#if 1 + glEnable(GL_DEPTH_TEST); + glBlendFunc(GL_SRC_ALPHA,GL_ONE); + glDepthFunc (GL_LEQUAL); + glDisable(GL_BLEND); + glDisable(GL_TEXTURE_2D); + + glMatrixMode(GL_TEXTURE); + glLoadIdentity(); + glScalef(btScalar(0.025),btScalar(0.025),btScalar(0.025)); +#endif + glMatrixMode(GL_MODELVIEW); +#if USE_ARRAYS + glDisableClientState(GL_VERTEX_ARRAY); + glDisableClientState (GL_TEXTURE_COORD_ARRAY); +#endif + //glDisable(GL_TEXTURE_2D); + } +} + +void GLDebugDrawString(int x,int y,const char* string) +{ + + btVector3 rgb(1,1,1); + GLDebugDrawStringInternal(x,y,string,rgb); +} + + +unsigned char sFontData[] = +{ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,103,103,103,145,145,145,255,255,255,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,103,103,103,213,213,213,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,103,103,103,2,2,2,255,255,255,255,255,255,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,0,0,0,103,103,103,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,255,255,255,246,246,246,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,255,255,255,246,246,246,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213, + 213,213,255,255,255,255,255,255,246,246,246,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,246,246,246,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,246,246,246,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,255,255,255,213,213,213,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,255,255,255,178,178,178,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,213,213,213,255,255,255,255,255,255,255,255,255,213,213,213,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,178,178,178,178,178,178,255,255,255,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,0,0,0,0,0,0,0,0,0,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,103,103,103,246,246,246,255,255,255,103,103,103,103,103,103,103,103,103,178,178,178,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,145,145,145,178,178,178,255,255,255,255,255,255,255,255,255,145,145,145,103,103,103,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,255,255,255,0,0,0,2,2,2,255,255,255,178,178,178,103,103,103,145,145,145,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,145,145,145,103,103,103,246,246,246,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,145,145,145,103,103,103,246,246,246,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255, + 255,255,145,145,145,103,103,103,246,246,246,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,213,213,213,178,178,178,255,255,255,255,255,255,178,178,178,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,213,213,213,178,178,178,255,255,255,255,255,255,178,178,178,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,178,178,178,103,103,103,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,103,103,103,145,145,145,255,255,255,255,255,255,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,103,103,103,103,103,103,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,103,103,103,145,145,145,178,178,178,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,103,103,103,145,145,145,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,0,0,0,213,213,213,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,70,70,70,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,70, + 70,70,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,255,255,255,70,70, + 70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,213,213,213,255,255,255,255,255,255,0,0,0,0,0,0,70,70,70,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,103,103,103,255,255,255,213,213,213,70,70,70,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,2,2,2,255,255,255,246,246,246,178,178,178,246,246,246,70,70,70,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37, + 37,37,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,2,2,2,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,2,2,2,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,178,178,178,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,255,255,255,213,213, + 213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,0,0,0,255,255,255,255,255,255,103,103,103,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,103,103,103,255,255,255,103,103,103,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,178,178,178,255,255,255,37,37,37,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,103,103,103,255,255,255,255,255,255,145,145,145,103,103,103,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,2,2,2,145,145,145,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255, + 255,255,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,103,103,103,0,0,0,0,0,0,213,213,213,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,145,145,145,255,255, + 255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,70,70,70,255,255,255,70,70,70,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,213,213,213,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,70,70,70,255,255,255,37,37,37,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,103,103,103,103,103,103,246,246,246,255,255,255,255,255,255,103,103,103,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,70,70,70,255,255,255,0,0,0,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,145,145,145,103,103,103,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,145,145,145,103,103,103,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255, + 255,255,255,255,255,145,145,145,103,103,103,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,213,213,213,0,0,0,37,37,37,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,213,213,213,0,0,0,37,37,37,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,246,246,246,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,70,70,70,255,255, + 255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,178,178,178,103,103,103,213,213,213,255,255,255,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,37,37,37,255,255,255,178,178,178,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,70,70,70,0,0,0,70,70,70,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,255,255,255,255,255,255,145,145,145,213,213,213,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,70,70,70,255,255,255,37,37,37,0,0,0,145,145,145,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,178,178,178,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,0,0,0,103,103,103,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,0,0,0,103,103,103,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,255,255, + 255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,255,255,255,255,255,255,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,178,178,178,255,255,255,213,213,213,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,213,213,213,255,255,255,103,103,103,103,103,103,103,103,103,103,103,103,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,255,255,255,255,255,255,246,246,246,103,103,103,246,246,246,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,0,0,0,0,0,0,0,0,0,37,37,37,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,246,246,246,103,103,103,246,246,246,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,103,103,103,70,70,70,0,0,0,103,103,103,255,255, + 255,246,246,246,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,103,103,103,246,246,246,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,255,255,255,145,145,145,103,103,103,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,103,103,103,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,2,2,2,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,37,37,37,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,213,213,213,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,2,2,2,255,255,255,255,255, + 255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,255,255,255,255,255,255,178,178,178,178,178,178,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,246,246,246,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,246,246,246,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,0,0,0,255,255,255,246,246, + 246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,103,103,103,255,255, + 255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,103,103,103,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,246,246,246,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,213,213,213,255,255,255,255,255,255,255,255,255,178,178,178,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,213,213,213,213,213,213,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,213,213,213,213,213,213,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255, + 255,255,255,255,255,213,213,213,213,213,213,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,178,178,178,213,213,213,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,213,213,213,255,255,255,255,255,255,255,255,255,178,178,178,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,213,213,213,255,255,255,255,255,255,255,255,255,178,178,178,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,213,213,213,255,255,255,255,255,255,255,255,255,178,178,178,2,2,2,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,255,255,255,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,103,103,103,145,145,145,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,213,213,213,178,178,178,255,255,255,255,255,255,178,178,178,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,103,103,103,103,103,103,103,103,103,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,178,178,178,103,103,103,255,255,255,255,255,255,255,255,255,178,178,178,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,178,178,178,103,103,103,255,255,255,255,255,255,255,255,255,178,178,178,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,178, + 178,178,103,103,103,255,255,255,255,255,255,255,255,255,178,178,178,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,145,145,145,145,145,145,255,255,255,255,255,255,255,255,255,178,178,178,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,145,145,145,145,145,145,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,103,103,103,103,103,103,103,103,103,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,103,103,103,103,103,103,103,103,103,178,178,178,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,103,103,103,103,103,103,103,103,103,178,178,178,255,255,255,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,103,103,103,178,178,178,255,255,255,178,178,178,103,103,103,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,103,103,103,178,178,178,255,255,255,178,178,178,103,103,103,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,103,103,103,178,178,178,255,255,255,178,178,178,103,103,103,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,246,246,246,255,255,255,145,145,145,0,0,0,37,37,37,246,246,246,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,246,246,246,255,255,255,145,145,145,0,0,0,37,37,37,246,246,246,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,2, + 2,2,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,2,2,2,0,0,0,0,0,0,2,2,2,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,103,103, + 103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,2,2,2,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,103,103,103,103,103,103,103,103,103,103,103,103,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,103,103,103,103,103,103,103,103,103,103,103,103,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,103,103,103,103,103,103,103,103,103,103,103,103,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,103,103,103,103,103,103,103,103,103,103,103,103,255,255,255,178,178, + 178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,246,246,246,103,103,103,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,246,246,246,103,103,103,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,103,103,103,103,103,103,103,103,103,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,103,103,103,103,103,103,103,103,103,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37, + 37,37,103,103,103,103,103,103,103,103,103,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,103,103,103,103,103,103,103,103,103,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246, + 246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,213,213,213,0,0,0,37,37,37,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,178,178,178,103,103,103,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,103,103,103,103,103,103,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,103,103,103,103,103,103,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255, + 255,255,246,246,246,103,103,103,103,103,103,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,103,103,103,145,145,145,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,103,103,103,145,145,145,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,178,178,178,103,103,103,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,178,178,178,103,103,103,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,178,178,178,103,103,103,255,255,255,255,255,255,103,103, + 103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,103,103,103,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,103,103,103,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,103,103,103,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,2,2,2,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,2,2,2,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,246,246,246,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,0,0,0,103,103,103,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,246,246,246,255,255,255,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,246,246,246,255,255,255,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103, + 103,103,246,246,246,255,255,255,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,255,255,255,246,246,246,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,255,255,255,103,103,103,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,255,255,255,103,103,103,145,145,145,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,213,213,213,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,213,213,213,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,213,213,213,255,255,255,255,255,255,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,246,246,246,0,0,0,0,0,0,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,70,70,70,70,70,70,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,37,37,37,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,37,37,37,70,70,70,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,145,145,145,178,178,178,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,246,246,246,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,0,0,0,0,0,0,246,246,246,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,246,246,246,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,246,246,246,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,255,255,255,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,37,37,37,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,246,246,246,103,103,103,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,103,103,103,246,246,246,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,103,103,103,145,145,145,255,255,255,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,213,213,213,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,213,213,213,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,213,213,213,213,213,213,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,255,255,255,255,255,255,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,213,213,213,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246, + 246,246,255,255,255,255,255,255,246,246,246,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,178,178,178,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,255,255,255,145,145,145,178,178,178,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,103,103,103,178,178,178,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,178,178,178,255,255,255,178,178,178,103,103,103,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,246,246,246,103,103,103,103,103,103,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,213,213,213,103,103,103,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255, + 255,255,213,213,213,178,178,178,255,255,255,255,255,255,178,178,178,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,178,178,178,0,0,0,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,178,178,178,255,255,255,255,255,255,0,0,0,0,0,0,70,70,70,255,255,255,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,103,103,103,103,103,103,178,178,178,255,255,255,2,2, + 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,70,70,70,0,0,0,0,0,0,37,37,37,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,70,70,70,0,0,0,0,0,0,37,37,37,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,246,246,246,0,0,0,2,2,2,37,37,37,145,145,145,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255, + 255,255,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,70,70,70,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,145,145,145,255,255,255,103,103,103,255,255,255,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,178,178,178,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,246,246,246,0,0,0,0,0,0,246,246,246,70,70, + 70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,2,2,2,255,255,255,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255, + 255,255,2,2,2,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,103,103,103,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,103,103,103,255,255,255,255,255,255,0,0,0,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,178,178,178,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,2,2,2,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,246,246,246,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,70,70,70,2,2,2,2,2,2,103,103,103,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255, + 255,255,103,103,103,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,178,178,178,0,0,0,0,0,0,213,213,213,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,255,255,255,255,255,255,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,103,103,103,178,178,178,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,103,103,103,0,0,0,0,0,0,213,213,213,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,2,2,2,0,0,0,145,145,145,255,255,255,103,103,103,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,178,178,178,0,0,0,0,0,0,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,255,255,255,103,103,103,213,213,213,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,103,103,103,246,246,246,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,246,246,246,255,255,255,255,255,255,246,246,246,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,145,145,145,103,103,103,178,178,178,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,255,255,255,103,103,103,103,103,103,103,103,103,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255, + 255,255,213,213,213,0,0,0,37,37,37,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,246,246,246,255,255,255,213,213,213,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,246,246,246,70,70,70,103,103,103,246,246,246,0,0,0,213,213,213,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,255,255,255,103,103,103,0,0,0,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,246,246,246,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,103,103,103,103,103,103,255,255,255,255,255,255,246,246, + 246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,213,213,213,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,255,255,255,145,145,145,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,70,70,70,178,178,178,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255, + 255,255,255,255,255,0,0,0,103,103,103,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,255,255,255,255,255,255,70,70,70,0,0,0,178,178,178,255,255,255,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,255,255,255,0,0,0,255,255,255,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,145,145,145,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,246,246,246,255,255,255,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,246,246,246,103,103,103,103,103,103,145,145,145,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,178,178,178,103,103,103,178,178,178,255,255,255,145,145,145,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,213,213,213,213,213,213,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,213,213,213,213,213,213,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,255,255,255,255,255,255,213,213,213,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,213,213,213,213,213,213,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,213, + 213,213,255,255,255,255,255,255,255,255,255,178,178,178,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,246,246,246,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,213,213,213,145,145,145,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,178,178,178,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,103,103,103,255,255,255,255,255,255,103,103,103,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,255,255,255,255,255,255,213,213,213,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,178,178,178,103,103,103,255,255,255,255,255,255,255,255,255,178,178,178,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,246,246,246,255,255,255,255,255,255,145,145,145,178,178,178,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,145,145,145,103,103,103,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,103,103,103,178,178,178,255,255,255,255,255,255,178,178,178,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246, + 246,246,103,103,103,103,103,103,103,103,103,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,178,178,178,255,255,255,178,178,178,103,103,103,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,145,145,145,178,178,178,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,246,246,246,255,255,255,103,103,103,0,0,0,178,178,178,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,103,103,103,178,178,178,255,255,255,178,178,178,103,103,103,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,246,246,246,255,255,255,0,0,0,70,70,70,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,103,103,103,178,178,178,255,255,255,178,178,178,103,103,103,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,246,246,246,255,255,255,37,37,37,178,178,178,255,255,255,37,37,37,178,178,178,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,246,246,246,255,255,255,103,103,103,0,0,0,70,70,70,178,178,178,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,213,213,213,103,103,103,103,103,103,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,70,70,70,0,0,0,0,0,0,37,37,37,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,70,70,70,0,0,0,0,0,0,37,37,37,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,103,103,103,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,103,103,103,255,255,255,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246, + 246,246,103,103,103,103,103,103,103,103,103,103,103,103,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,2,2,2,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,2,2,2,0,0,0,255,255,255,2,2,2,0,0,0,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,103,103,103,103,103,103,103,103,103,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255, + 255,255,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,255,255,255,103,103,103,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,103,103,103,103,103,103,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,103,103,103,213,213,213,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,103,103,103,103,103,103,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,103,103,103,246,246,246,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255, + 255,255,255,255,255,178,178,178,103,103,103,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,246,246,246,255,255,255,103,103,103,103,103,103,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,255,255,255,103,103,103,213,213,213,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,103,103,103,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,103,103,103,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,103,103,103,103,103,103,246,246,246,255,255,255,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,213,213,213,255,255,255,246,246,246,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,255,255,255,37,37,37,255,255,255,255,255,255,70,70,70,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,255,255,255,178,178,178,103,103,103,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,178,178,178,103,103,103,145,145,145,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,246,246,246,255,255,255,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,178,178,178,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,255,255,255,213,213,213,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,255,255,255,145,145,145,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,145,145,145,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,213,213,213,255,255,255,255,255,255,178,178,178,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,178,178,178,255,255,255,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,37,37, + 37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,255,255,255,246,246,246,255,255,255,145,145,145,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,213,213,213,255,255,255,255,255,255,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,103,103,103,103,103,103,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,2,2, + 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,246,246,246,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,246,246,246,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246, + 246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,246,246,246,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255, + 255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,246,246,246,178,178,178,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,103,103,103,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,103,103,103,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,246,246,246,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,246,246,246,0,0,0,0,0,0,255,255,255,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,255,255,255,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255, + 255,255,255,255,255,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,103,103,103,0,0,0,0,0,0,2,2,2,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,145,145,145,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,103,103,103,246,246,246,255,255,255,103,103,103,103,103,103,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,246,246,246,255,255,255,103,103,103,0,0,0,103,103,103,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,103,103,103,145,145,145,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,178,178,178,255,255,255,178,178,178,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255, + 255,255,145,145,145,103,103,103,178,178,178,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,103,103,103,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,213,213,213,255,255,255,246,246,246,2,2,2,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,178,178,178,255,255,255,178,178,178,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,103,103,103,103,103,103,178,178,178,255,255,255,2,2, + 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,37, + 37,37,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,178,178,178,0,0,0,145,145,145,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,2,2,2,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,145,145,145,0,0,0,0,0,0,255,255,255,103,103, + 103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,103,103,103,103,103,103,213,213,213,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,2, + 2,2,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,37,37,37,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,145,145,145,255,255,255,2,2,2,246,246,246,103,103,103,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,70,70,70,0,0,0,145,145,145,103,103, + 103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,103,103,103,103,103,103,213,213,213,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,103,103,103,103,103,103,246,246,246,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103, + 103,103,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,145,145,145,103,103,103,246,246,246,255,255,255,103,103,103,213,213,213,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,70,70,70,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,2,2,2,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,145,145,145,2,2,2,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,0,0,0,70,70,70,255,255,255,0,0,0,2,2,2,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,213, + 213,213,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,103,103,103,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,255,255,255,255,255,255,2,2,2,70,70,70,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,178,178,178,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,70,70,70,0,0,0,255,255,255,255,255,255,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,37,37,37,0,0,0,255,255,255,37,37,37,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255, + 255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,103,103,103,255,255,255,2,2,2,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,103,103,103,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,37,37,37,0,0,0,103,103,103,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,255,255,255,255,255,255,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,213,213,213,255,255,255,103,103,103,103,103,103,145,145,145,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,246,246,246,103,103,103,246,246,246,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,213,213,213,255,255,255,103,103,103,103,103,103,103,103,103,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,145,145,145,103,103,103,255,255,255,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,103,103,103,255,255,255,246,246,246,103,103,103,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255, + 255,255,103,103,103,103,103,103,0,0,0,70,70,70,103,103,103,255,255,255,246,246,246,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,246,246,246,2,2,2,0,0,0,0,0,0,213,213,213,255,255,255,213,213,213,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,0,0,0,213,213,213,255,255,255,145,145,145,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,145,145,145,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,103,103,103,103,103,103,213,213,213,255,255,255,255,255, + 255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,103,103,103,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,246,246,246,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,255,255,255,255,255,255,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255, + 255,255,255,255,255,255,255,255,2,2,2,178,178,178,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,37,37,37,178,178,178,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,255,255,255,246,246,246,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,255,255,255,213,213,213,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,213,213,213,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,255,255,255,246,246,246,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,213,213,213,103,103,103,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,255,255,255,213,213,213,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,255,255,255,255,255,255,0,0,0,246,246,246,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,145,145,145,103,103,103,103,103,103,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,246,246,246,255,255,255,145,145,145,0,0,0,0,0,0,37,37,37,246,246,246,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,246,246,246,255,255,255,103,103,103,103,103,103,103,103,103,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,178,178,178,103,103,103,246,246,246,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,246,246,246,255,255,255,103,103,103,103,103,103,145,145,145,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,246,246,246,255, + 255,255,103,103,103,103,103,103,103,103,103,178,178,178,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,246,246,246,255,255,255,103,103,103,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,178,178,178,103,103,103,103,103,103,213,213,213,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,246,246,246,255,255,255,103,103,103,0,0,0,178,178,178,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,103,103,103,178,178,178,255,255,255,178,178,178,103,103,103,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,145,145,145,103,103,103,255,255,255,255,255,255,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,246,246,246,255,255,255,103,103,103,70,70,70,0,0,0,37,37,37,255,255,255,246,246,246,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,246,246,246,255,255,255,103,103,103,103,103,103,103,103,103,178,178,178,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,246,246,246,255,255,255,103,103,103,0,0,0,2,2,2,178,178,178,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,246,246,246,255,255,255,103,103,103,0,0,0,70,70,70,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,255,255,255,103,103,103,145,145,145,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,103,103,103,103,103,103,255,255,255,255,255,255,178,178,178,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255, + 255,255,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,37,37,37,0,0,0,70,70,70,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,145,145,145,178,178,178,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,213,213,213,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,246,246,246,255,255,255,2,2,2,213,213,213,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,246,246,246,103,103,103,103,103,103,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,103,103,103,103,103,103,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255, + 255,255,178,178,178,103,103,103,255,255,255,213,213,213,70,70,70,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,103,103,103,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,2,2,2,0,0,0,0,0,0,103,103,103,103,103,103,103,103,103,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,103,103,103,103,103,103,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,2,2,2,178,178,178,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,2,2,2,246,246,246,255,255,255,0,0,0,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,2,2,2,0,0,0,255,255,255,213,213,213,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,37,37,37,255,255,255,178,178,178,37,37,37,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,70,70,70,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,246,246,246,255,255,255,178,178,178,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,70,70,70,255,255,255,70,70,70,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,2,2,2,213,213,213,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,145,145,145,103,103,103,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246, + 246,246,213,213,213,0,0,0,70,70,70,255,255,255,37,37,37,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,70,70,70,255,255,255,37,37,37,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,0,0,0,70,70,70,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,145,145,145,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,246,246,246,103,103,103,255,255,255,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,213,213,213,255,255,255,0,0,0,70,70,70,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,70,70,70,0,0,0,0,0,0,37,37,37,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145, + 145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,37,37,37,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37, + 37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,2,2,2,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,246,246,246,2,2,2,103,103,103,246,246,246,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,213,213,213,0,0,0,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,246,246,246,103,103,103,103,103,103,246,246,246,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,213,213,213,255,255,255,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,213,213,213,255,255,255,103,103,103,103,103,103,103,103,103,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,178,178,178,103,103,103,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,213,213,213,255,255,255,103,103,103,103,103,103,103,103,103,246,246,246,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,213, + 213,213,255,255,255,103,103,103,103,103,103,103,103,103,103,103,103,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,213,213,213,255,255,255,103,103,103,103,103,103,103,103,103,103,103,103,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,255,255,255,103,103,103,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,103,103,103,0,0,0,103,103,103,255,255,255,246,246,246,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,103,103,103,103,103,103,255,255,255,246,246,246,103,103,103,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,103,103,103,103,103,103,255,255,255,246,246, + 246,103,103,103,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,213,213,213,255,255,255,103,103,103,103,103,103,0,0,0,255,255,255,255,255,255,145,145,145,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,213,213,213,255,255,255,103,103,103,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,255,255,255,2,2,2,0,0,0,255,255,255,255,255,255,246,246,246,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,255,255,255,70,70,70,0,0,0,103,103,103,255,255,255,246,246,246,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,255,255,255,145,145,145,103,103,103,255,255,255,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,246,246,246,255,255,255,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,255,255,255,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,255,255,255,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,70,70,70,246,246,246,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,2,2,2,0,0,0,103,103,103,255,255,255,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,0,0,0,2,2,2,255,255,255,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,255,255,255,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255, + 255,255,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,246,246,246,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,145,145,145,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,103,103,103,178,178,178,255,255,255,178,178,178,103,103,103,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,255,255,255,103,103,103,178,178,178,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,103,103,103,103,103,103,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,178,178,178,255,255,255,178,178,178,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213, + 213,213,103,103,103,145,145,145,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,103,103,103,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,255,255,255,103,103,103,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,103,103,103,145,145,145,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,103,103,103,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,37,37,37,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,246,246,246,0,0,0,103,103,103,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,37,37,37,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,213,213,213,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,255,255,255,103,103,103,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,103,103,103,103,103,103,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,246,246,246,37,37,37,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,246,246,246,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,213,213,213,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255, + 255,255,213,213,213,103,103,103,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,178,178,178,213,213,213,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,178,178,178,213,213,213,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246, + 246,246,255,255,255,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,246,246,246,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,255,255,255,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,246,246,246,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,178,178,178,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,103,103,103,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,246,246,246,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145, + 145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,103,103,103,0,0,0,0,0,0,37,37,37,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,37,37,37,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,37,37,37,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,246,246,246,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,103,103,103,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,246,246,246,103,103,103,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,103,103,103,213,213,213,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37, + 37,37,255,255,255,103,103,103,103,103,103,103,103,103,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,255,255,255,103,103,103,103,103,103,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,103,103,103,103,103,103,103,103,103,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,103,103,103,213,213,213,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,103,103,103,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,213,213,213,103,103,103,103,103,103,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,213,213,213,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,213,213,213,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,213,213,213,255,255,255,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,246,246,246,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,70,70,70,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,246,246,246,255,255,255,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,70,70,70,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,246,246,246,178,178,178,255,255,255,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,246,246,246,246,246,246,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,246,246,246,213,213,213,37,37,37,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,145,145,145,213,213,213,255,255,255,255,255,255,103,103,103,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,178, + 178,178,0,0,0,0,0,0,213,213,213,213,213,213,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,2,2,2,178,178,178,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,178,178,178,255,255,255,178,178,178,103,103,103,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,145,145,145,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103, + 103,103,255,255,255,178,178,178,0,0,0,246,246,246,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,0,0,0,0,0,0,103,103,103,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,246,246,246,246,246,246,255,255,255,255,255,255,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,2,2,2,178,178,178,255,255,255,103,103,103,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,213,213,213,37,37,37,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,213,213,213,255,255,255,37,37, + 37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,0,0,0,213,213,213,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,103,103,103,213,213,213,255,255,255,145,145,145,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255, + 255,255,255,255,255,145,145,145,0,0,0,213,213,213,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,37,37,37,103,103,103,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,145,145,145,70,70,70,2,2,2,213,213,213,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255, + 255,255,0,0,0,255,255,255,2,2,2,0,0,0,2,2,2,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,103,103,103,246,246,246,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,145,145,145,103,103,103,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255, + 255,255,37,37,37,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,2,2,2,255,255,255,145,145,145,70,70, + 70,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,255,255,255,2,2,2,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,70,70,70,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70, + 70,70,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,145,145,145,145,145,145,255,255,255,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,178,178,178,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,37,37,37,255,255,255,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,178,178,178,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,178,178,178,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,246,246,246,255,255, + 255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,246,246,246,255,255,255,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,2,2,2,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,103,103,103,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,103,103,103,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37, + 37,37,255,255,255,103,103,103,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,37,37,37,103,103,103,213,213,213,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,37,37,37,103,103,103,213,213,213,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0, + 0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,213,213,213,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,37,37,37,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,2,2,2,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255, + 255,255,246,246,246,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,2,2,2,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,2,2,2,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,213,213,213,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0, + 0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,37,37,37,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,70,70,70,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,246,246,246,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255, + 255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,145,145,145,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0, + 0,246,246,246,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,2,2,2,255,255,255,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,246,246,246,103,103,103,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255, + 255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,0,0,0,0,0,0,246,246,246,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0, + 0,246,246,246,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,103,103,103,246,246,246,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,255,255,255,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,246,246,246,246,246,246,246,246,246,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,2,2,2,2,2,2,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,70,70,70,103,103,103,255,255,255,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255, + 255,255,246,246,246,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,37,37,37,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0, + 0,246,246,246,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,213,213,213,246,246,246,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,246,246,246,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,145,145,145,246,246,246,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,0,0,0,70,70,70,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,103,103,103,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,103,103,103,255,255,255,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,103,103,103,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,103,103,103,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37, + 37,37,255,255,255,103,103,103,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0, + 0,246,246,246,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,103,103,103,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,255,255,255,37,37,37,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,255,255,255,255,255,255,145,145,145,255,255,255,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,37,37,37,255,255,255,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0, + 0,246,246,246,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,255,255,255,213,213,213,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0, + 0,246,246,246,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,37,37,37,0,0,0,0,0,0,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,70,70,70,0,0,0,2,2,2,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,0,0,0,0,0,0,37,37,37,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,0,0,0,0,0,0,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0, + 0,246,246,246,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,246,246,246,246,246,246,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,145,145,145,0,0,0,70,70,70,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,178,178,178,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,0,0,0,0,0,0,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,70,70,70,0,0,0,0,0,0,0,0, + 0,145,145,145,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,255,255,255,213,213,213,145,145,145,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,70,70,70,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,2,2, + 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,0,0,0,2,2,2,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,2,2,2,0,0,0,255,255, + 255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,159,159,159,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,246,246,246,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,246,246,246,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,246,246,246,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,178,178,178,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,246,246,246,255,255,255,255,255,255,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,37,37,37,213,213,213,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,37,37,37,213,213,213,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,255,255,255,255,255,255,255,255,255,37,37,37,213,213,213,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,37,37,37,213,213,213,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,246,246,246,255,255,255,255,255,255,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,246,246,246,255,255,255,255,255,255,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,246,246,246,255,255,255,255,255,255,255,255, + 255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,37,37,37,103,103,103,213,213,213,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,213,213,213,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,213,213,213,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255, + 255,255,246,246,246,0,0,0,0,0,0,213,213,213,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,213,213,213,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,178,178,178,0,0,0,0,0,0,0,0, + 0,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,2,2,2,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255, + 255,255,103,103,103,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,37,37,37,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,37,37,37,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103, + 103,103,255,255,255,37,37,37,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,37,37,37,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,246,246,246,246,246,246,246,246,246,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,178,178,178,246,246,246,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,178,178,178,246,246,246,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,2,2,2,178,178,178,246,246,246,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,178,178,178,246,246,246,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,246,246,246,246,246,246,246,246,246,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,246,246,246,246,246,246,246,246,246,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,246,246,246,246,246,246,246,246,246,255,255, + 255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,178,178,178,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,178,178,178,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,178,178,178,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,178,178,178,0,0,0,0,0,0,0,0,0,145,145, + 145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,70,70,70,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,255,255, + 255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,145,145,145,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,145,145,145,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,213,213,213,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,255,255,255,255,255,255,178,178, + 178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,0,0,0,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,0,0,0,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,70,70,70,0,0,0,0,0,0,2,2,2,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,0,0,0,0,0,0,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,37,37,37,0,0,0,0,0,0,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,70,70,70,0,0,0,2,2,2,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,37,37,37,0,0,0,0,0,0,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,0,0,0,0,0,0,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,2,2,2,0,0,0,70,70,70,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,0,0,0,0,0,0,37,37,37,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,246,246,246,255,255,255,255,255,255,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,0,0,0,0,0,0,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,145,145,145,0,0,0,70,70,70,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,0,0,0,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,0,0,0,0,0,0,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,246,246,246,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,70,70,70,0,0,0,145,145,145,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,246,246,246,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,145,145,145,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,246,246,246,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,0,0,0,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,103,103,103,0,0,0,246,246,246,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,103,103,103,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,145,145,145,213,213,213,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,70,70,70,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,255,255,255,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,255,255,255,255,255,255,178,178,178,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,2,2,2,0,0,0,103,103,103,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,103,103,103,37,37,37,246,246,246,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,255,255,255,255,255,255,37,37,37,103,103,103,213,213,213,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,70,70,70,0,0,0,37,37,37,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,145,145,145,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,213,213,213,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,255,255,255,103,103,103,0,0,0,0,0,0,2,2,2,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,145,145,145,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,213,213,213,0,0,0,145,145,145,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,70,70,70,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,213,213,213,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,255,255,255,0,0,0,255,255,255,37,37,37,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,145,145,145,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,103,103,103,255,255,255,145,145,145,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,246,246,246,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,0,0,0,246,246,246,0,0,0,255,255,255,0,0,0,213,213,213,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,0,0,0,0,0,0,246,246,246,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,70,70,70,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,213,213,213,0,0,0,103,103,103,246,246,246,255,255,255,0,0,0,103,103,103,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,37,37,37,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178, + 178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,255,255,255,37,37,37,103,103,103,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,103,103,103,0,0,0,2,2,2,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,255,255,255,70,70,70,103,103,103,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,103,103,103,0,0,0,0,0,0,255,255,255,145,145,145,0,0,0,2,2,2,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255, + 255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,70,70,70,255,255,255,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,246,246,246,70,70,70,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,37,37,37,255,255,255,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,103,103,103,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,103,103,103,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,103,103,103,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,103,103,103,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,255,255,255,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,0,0,0,0,0,0,103,103,103,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,0,0,0,0,0,0,213,213,213,255,255,255,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2, + 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,37,37,37,213,213,213,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,246,246,246,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,255,255,255,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,37,37,37,246,246,246,255,255,255,255,255,255,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,70,70,70,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2, + 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,2,2,2,0,0,0,246,246,246,103,103,103,0,0,0,103,103,103,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,213,213,213,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,37,37,37,0,0,0,2,2,2,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,103,103,103,37,37,37,246,246,246,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37, + 37,37,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,103,103,103,37,37,37,255,255,255,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2, + 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,2,2,2,0,0,0,246,246,246,103,103,103,0,0,0,103,103,103,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,103,103,103,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255, + 255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2, + 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,2,2,2,0,0,0,246,246,246,103,103,103,0,0,0,103,103,103,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,37,37,37,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255, + 255,255,178,178,178,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2, + 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,145,145,145,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,2,2,2,0,0,0,246,246,246,103,103,103,0,0,0,103,103,103,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,178,178,178,246,246,246,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255, + 255,255,255,255,255,246,246,246,246,246,246,246,246,246,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2, + 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,2,2,2,0,0,0,246,246,246,103,103,103,0,0,0,103,103,103,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213, + 213,213,178,178,178,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,213,213,213,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2, + 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,37,37,37,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,246,246,246,246,246,246,0,0,0,103,103,103,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,255,255,255,37,37,37,70,70,70,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,70,70,70,0,0,0,103,103,103,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,255,255,255,103,103,103,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,2,2,2,0,0,0,103,103,103,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,246,246,246,2,2,2,145,145,145,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2, + 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,70,70,70,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,70,70,70,255,255,255,246,246,246,178,178,178,213,213,213,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,246,246,246,2,2,2,145,145,145,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,103,103,103,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,37,37,37,255,255,255,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,145,145,145,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,2,2,2,255,255,255,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,255,255,255,178,178,178,145,145,145,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,37,37,37,255,255,255,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,255,255,255,255,255,255,2,2, + 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,2,2,2,246,246,246,255,255,255,0,0,0,178,178,178,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,37,37,37,255,255,255,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,246,246,246,103,103,103,103,103,103,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,2,2, + 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,103,103,103,103,103,103,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,103,103,103,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,246,246,246,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,103,103,103,103,103,103,103,103,103,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,246,246,246,246,246,246,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,246,246,246,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,37,37,37,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,246,246,246,246,246,246,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,255,255,255,255,255,255,246,246,246,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,37,37,37,0,0,0,145,145,145,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,103,103,103,0,0,0,246,246,246,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,246,246,246,0,0,0,255,255,255,213,213,213,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,37,37,37,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,246,246,246,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,255,255,255,0,0,0,255,255,255,2,2,2,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,2,2,2,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,103,103,103,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,103,103,103,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,2,2,2,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,246,246,246,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,255,255,255,103,103,103,255,255,255,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,70,70,70,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,103,103,103,145,145,145,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,246,246,246,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,246,246,246,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,70,70,70,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,0,0,0,246,246,246,255,255,255,178,178,178,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,178,178,178,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,246,246,246,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,103,103,103,255,255,255,103,103,103,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,37,37,37,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,37,37, + 37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,70,70,70,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,246,246,246,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,246,246,246,0,0,0,2,2,2,246,246,246,0,0,0,0,0,0,246,246,246,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255, + 255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,178,178,178,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,246,246,246,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,103,103,103,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255, + 255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,2,2,2,2,2,2,103,103,103,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,2,2,2,37,37,37,213,213,213,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,213,213,213,0,0,0,0,0,0,0,0,0,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2,2,178,178,178,255,255,255,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,246,246,246,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2,2,2,2,2,178,178, + 178,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,246,246,246,246,246,246,213,213,213,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,246,246,246,246,246,246,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,255,255,255,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,246,246,246,70,70,70,0,0,0,0,0,0,0,0,0,145,145,145,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,246,246,246,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,246,246,246,246,246,246,246,246,246,246,246,246,246,246, + 246,246,246,246,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,103,103,103,103,103,103,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,103,103,103,103,103,103,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,103,103,103,103,103,103,103,103,103,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,103,103,103,103,103,103,103,103,103,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,255,255,255,246,246,246,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,255,255,255,246,246,246,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,37,37, + 37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,2,2,2,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,213,213,213,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255, + 255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,103,103,103,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,103,103,103,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,213,213,213,0,0,0,0,0,0,103,103,103,0,0,0,70,70,70,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255, + 255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,2,2,2,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,255,255,255,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,2,2,2,0,0,0,255,255,255,178,178,178,178,178,178,246,246,246,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255, + 255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,103,103,103,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,103,103,103,255,255,255,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,2,2,2,255,255,255,0,0,0,178,178,178,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255, + 255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,70,70,70,255,255,255,255,255,255,0,0,0,103,103,103,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,255,255,255,246,246,246,103,103,103,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,2,2,2,246,246,246,0,0,0,0,0,0,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,255,255,255,255,255,255,246,246,246,246,246,246,246,246,246,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,246,246,246,246,246,246,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255, + 255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,213,213,213,178,178,178,255,255,255,70,70,70,103,103,103,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,103,103,103,255,255,255,2,2,2,103,103,103,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,145,145,145,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,255,255,255,178,178,178,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255, + 255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,255,255,255,2,2,2,103,103,103,213,213,213,103,103,103,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,255,255,255,178,178,178,0,0,0,103,103,103,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,103,103,103,0,0,0,145,145,145,103,103,103,0,0,0,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,0,0,0,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255, + 255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,255,255,255,0,0,0,0,0,0,255,255,255,103,103,103,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,103,103,103,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,246,246,246,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,0,0,0,0,0,0,103,103,103,246,246,246,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255, + 255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,37,37,37,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,145,145,145,0,0,0,0,0,0,255,255,255,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,103,103,103,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,2,2,2,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,2,2,2,37,37,37,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,70,70,70,0,0,0,0,0,0,2,2,2,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,2,2,2,70,70,70,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,255,255,255,178,178,178,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,178,178,178,255,255,255,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,103,103,103,255,255, + 255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,103,103,103,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,246,246,246,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,246,246,246,246,246,246,213,213,213,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,246,246,246,255,255,255,255,255,255,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,246,246,246,246,246,246,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,246,246,246,255,255,255,255,255,255,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,70,70,70,0,0,0,0,0,0,0,0,0,246,246,246,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,246,246,246,246,246, + 246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,70,70,70,0,0,0,0,0,0,0,0,0,213,213,213,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,103,103,103,0,0,0,0,0,0,0,0,0,70,70,70,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,246,246,246,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,246,246,246,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,255,255,255,246,246,246,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,246,246,246,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,246,246,246,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,246,246,246,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,2,2, + 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,2,2,2,0,0,0,0,0,0,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,2,2, + 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,178,178,178,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,246,246,246,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,103,103,103,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,246,246,246,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,0,0,0,0,0,0,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,246,246,246,255,255,255,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,103,103,103,103,103,103,103,103,103,103,103,103,103,103,103,103,103,103,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,246,246,246,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,70,70,70,103,103,103,246,246,246,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,2,2,2,0,0,0,103,103,103,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,103,103,103,103,103,103,103,103,103,103,103,103,103,103,103,103,103,103,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,246,246,246,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,37,37,37,246,246,246,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,246,246,246,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,246,246,246,246,246,246,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,213,213,213,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,178,178,178,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,2,2, + 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,178,178,178,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,103,103,103,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,246,246,246,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,103,103,103,0,0,0,0,0,0,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,2,2, + 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,178,178,178,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,246,246,246,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,213,213,213,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,178,178,178,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,0,0,0,0,0,0,2,2,2,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,246,246,246,246,246,246,103,103,103,103,103,103,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,178,178,178,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,103,103,103,103,103,103,103,103,103,145,145,145,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,70,70,70,0,0,0,2,2,2,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,2,2,2,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,213,213,213,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,255,255,255,255,255,255,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,145,145,145,246,246,246,246,246,246,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,246,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,255,255,255,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,70,70,70,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,255,255,255,255,255,255,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,178, + 178,178,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,255,255,255,255,255,255,103,103,103,145,145,145,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,2,2,2,0,0,0,255,255,255,0,0,0,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246, + 246,246,37,37,37,0,0,0,37,37,37,246,246,246,0,0,0,2,2,2,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,103,103,103,0,0,0,0,0,0,246,246,246,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,103,103,103,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,255,255,255,0,0,0,103,103,103,103,103,103,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,246,246,246,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,255,255,255,246,246,246,255,255,255,255,255,255,246,246,246,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,70,70,70,246,246,246,37,37,37,246,246,246,0,0,0,2,2,2,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,246,246,246,255,255,255,0,0,0,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,246,246,246,255,255,255,255,255,255,246,246,246,246,246,246,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,103,103,103,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,213,213,213,103,103,103,145,145,145,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,70,70,70,255,255,255,2,2,2,0,0,0,246,246,246,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2,2,255,255,255,103,103,103,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,37,37,37,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,2,2,2,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,255,255,255,246,246,246,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,70,70,70,0,0,0,255,255, + 255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,246,246,246,246,246,246,103,103,103,255,255,255,178,178,178,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255, + 255,255,145,145,145,255,255,255,37,37,37,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,255,255,255,145,145,145,178,178, + 178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,0,0,0,0,0,0,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,103,103,103,255,255,255,103,103,103,213,213,213,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0, + 0,0,0,0,0,255,255,255,255,255,255,145,145,145,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145,145,145,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,2,2,2,0,0,0,70,70, + 70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,2,2,2,0,0,0,103,103,103,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,255,255,255,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0, + 0,0,0,0,0,103,103,103,103,103,103,0,0,0,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,103,103,103,2,2,2,70,70,70,70,70, + 70,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,70,70,70,0,0,0,178,178,178,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,70,70,70,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,255,255,255,103,103,103,255,255,255,2,2,2,103,103,103,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,213,213,213,103, + 103,103,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,213,213,213,0,0,0,255,255,255,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,178,178,178,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,103,103,103,0,0,0,246,246,246,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,103,103,103,0,0,0,103,103,103,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,213,213,213,255,255,255,246,246,246,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178, + 178,178,246,246,246,37,37,37,0,0,0,0,0,0,0,0,0,103,103,103,213,213,213,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,178,178,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,255,255,255,255,255,255,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,178,178,255,255,255,145,145,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,145,145,145,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,70,70,70,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,2,2,2,0,0,0,70,70,70,70,70,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,246,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,103,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,70,70,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +}; diff --git a/Extras/RigidBodyGpuPipeline/dynamics/testbed/GLDebugFont.h b/Extras/RigidBodyGpuPipeline/dynamics/testbed/GLDebugFont.h new file mode 100644 index 000000000..bf2c2575d --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/testbed/GLDebugFont.h @@ -0,0 +1,29 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + +#ifndef BT_DEBUG_FONT_H +#define BT_DEBUG_FONT_H + +#include "LinearMath/btVector3.h" + + +void GLDebugDrawStringInternal(int x,int y,const char* string,const btVector3& rgb, bool enableBlend, int spacing); +void GLDebugDrawStringInternal(int x,int y,const char* string,const btVector3& rgb); +void GLDebugDrawString(int x,int y,const char* string); +void GLDebugResetFont(int screenWidth,int screenHeight); + +#endif //BT_DEBUG_FONT_H + diff --git a/Extras/RigidBodyGpuPipeline/dynamics/testbed/GL_ShapeDrawer.cpp b/Extras/RigidBodyGpuPipeline/dynamics/testbed/GL_ShapeDrawer.cpp new file mode 100644 index 000000000..7beb19f05 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/testbed/GL_ShapeDrawer.cpp @@ -0,0 +1,1058 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#ifdef _WIN32 //needed for glut.h +#include +#endif +#include "GLDebugFont.h" + + + +#include "GlutStuff.h" +#include "GL_ShapeDrawer.h" +#include "BulletCollision/CollisionShapes/btPolyhedralConvexShape.h" +#include "BulletCollision/CollisionShapes/btTriangleMeshShape.h" +#include "BulletCollision/CollisionShapes/btBoxShape.h" +#include "BulletCollision/CollisionShapes/btSphereShape.h" +#include "BulletCollision/CollisionShapes/btConeShape.h" +#include "BulletCollision/CollisionShapes/btCylinderShape.h" +#include "BulletCollision/CollisionShapes/btTetrahedronShape.h" +#include "BulletCollision/CollisionShapes/btCompoundShape.h" +#include "BulletCollision/CollisionShapes/btCapsuleShape.h" +#include "BulletCollision/CollisionShapes/btConvexTriangleMeshShape.h" +#include "BulletCollision/CollisionShapes/btUniformScalingShape.h" +#include "BulletCollision/CollisionShapes/btStaticPlaneShape.h" +#include "BulletCollision/CollisionShapes/btMultiSphereShape.h" +#include "BulletCollision/CollisionShapes/btConvexPolyhedron.h" + + +/// +#include "BulletCollision/CollisionShapes/btShapeHull.h" + +#include "LinearMath/btTransformUtil.h" + + +#include "LinearMath/btIDebugDraw.h" +//for debugmodes + +#include //printf debugging + +//#define USE_DISPLAY_LISTS 1 +#ifdef USE_DISPLAY_LISTS + +#include + +using namespace std; + +//Set for storing Display list per trimesh +struct TRIMESH_KEY +{ + btCollisionShape* m_shape; + GLuint m_dlist;//OpenGL display list +}; + +typedef map TRIMESH_KEY_MAP; + +typedef pair TRIMESH_KEY_PAIR; + +TRIMESH_KEY_MAP g_display_lists; + +class GlDisplaylistDrawcallback : public btTriangleCallback +{ +public: + + virtual void processTriangle(btVector3* triangle,int partId, int triangleIndex) + { + + btVector3 diff1 = triangle[1] - triangle[0]; + btVector3 diff2 = triangle[2] - triangle[0]; + btVector3 normal = diff1.cross(diff2); + + normal.normalize(); + + glBegin(GL_TRIANGLES); + glColor3f(1, 1, 1); + glNormal3d(normal.getX(),normal.getY(),normal.getZ()); + glVertex3d(triangle[0].getX(), triangle[0].getY(), triangle[0].getZ()); + + //glColor3f(0, 1, 0); + glNormal3d(normal.getX(),normal.getY(),normal.getZ()); + glVertex3d(triangle[1].getX(), triangle[1].getY(), triangle[1].getZ()); + + //glColor3f(0, 1, 0); + glNormal3d(normal.getX(),normal.getY(),normal.getZ()); + glVertex3d(triangle[2].getX(), triangle[2].getY(), triangle[2].getZ()); + glEnd(); + + /*glBegin(GL_LINES); + glColor3f(1, 1, 0); + glNormal3d(normal.getX(),normal.getY(),normal.getZ()); + glVertex3d(triangle[0].getX(), triangle[0].getY(), triangle[0].getZ()); + glNormal3d(normal.getX(),normal.getY(),normal.getZ()); + glVertex3d(triangle[1].getX(), triangle[1].getY(), triangle[1].getZ()); + glColor3f(1, 1, 0); + glNormal3d(normal.getX(),normal.getY(),normal.getZ()); + glVertex3d(triangle[2].getX(), triangle[2].getY(), triangle[2].getZ()); + glNormal3d(normal.getX(),normal.getY(),normal.getZ()); + glVertex3d(triangle[1].getX(), triangle[1].getY(), triangle[1].getZ()); + glColor3f(1, 1, 0); + glNormal3d(normal.getX(),normal.getY(),normal.getZ()); + glVertex3d(triangle[2].getX(), triangle[2].getY(), triangle[2].getZ()); + glNormal3d(normal.getX(),normal.getY(),normal.getZ()); + glVertex3d(triangle[0].getX(), triangle[0].getY(), triangle[0].getZ()); + glEnd();*/ + + + } +}; + +GLuint OGL_get_displaylist_for_shape(btCollisionShape * shape) +{ + TRIMESH_KEY_MAP::iterator map_iter; + + unsigned long key = (unsigned long)shape; + map_iter = g_display_lists.find(key); + if(map_iter!=g_display_lists.end()) + { + return map_iter->second.m_dlist; + } + + return 0; +} + +void OGL_displaylist_clean() +{ + TRIMESH_KEY_MAP::iterator map_iter,map_itend; + + map_iter = g_display_lists.begin(); + + while(map_iter!=map_itend) + { + glDeleteLists(map_iter->second.m_dlist,1); + map_iter++; + } + + g_display_lists.clear(); +} + + +void OGL_displaylist_register_shape(btCollisionShape * shape) +{ + btVector3 aabbMax(btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT)); + btVector3 aabbMin(-btScalar(BT_LARGE_FLOAT),-btScalar(BT_LARGE_FLOAT),-btScalar(BT_LARGE_FLOAT)); + GlDisplaylistDrawcallback drawCallback; + TRIMESH_KEY dlist; + + dlist.m_dlist = glGenLists(1); + dlist.m_shape = shape; + + unsigned long key = (unsigned long)shape; + + g_display_lists.insert(TRIMESH_KEY_PAIR(key,dlist)); + + glNewList(dlist.m_dlist,GL_COMPILE); + +// glEnable(GL_CULL_FACE); + + glCullFace(GL_BACK); + + if (shape->isConcave()) + { + btConcaveShape* concaveMesh = (btConcaveShape*) shape; + //todo pass camera, for some culling + concaveMesh->processAllTriangles(&drawCallback,aabbMin,aabbMax); + } + +// glDisable(GL_CULL_FACE); + + glEndList(); +} +#endif //USE_DISPLAY_LISTS + +void GL_ShapeDrawer::drawCoordSystem() { + glBegin(GL_LINES); + glColor3f(1, 0, 0); + glVertex3d(0, 0, 0); + glVertex3d(1, 0, 0); + glColor3f(0, 1, 0); + glVertex3d(0, 0, 0); + glVertex3d(0, 1, 0); + glColor3f(0, 0, 1); + glVertex3d(0, 0, 0); + glVertex3d(0, 0, 1); + glEnd(); + +} + + + + + +class GlDrawcallback : public btTriangleCallback +{ + +public: + + bool m_wireframe; + + GlDrawcallback() + :m_wireframe(false) + { + } + + virtual void processTriangle(btVector3* triangle,int partId, int triangleIndex) + { + + (void)triangleIndex; + (void)partId; + + + if (m_wireframe) + { + glBegin(GL_LINES); + glColor3f(1, 0, 0); + glVertex3d(triangle[0].getX(), triangle[0].getY(), triangle[0].getZ()); + glVertex3d(triangle[1].getX(), triangle[1].getY(), triangle[1].getZ()); + glColor3f(0, 1, 0); + glVertex3d(triangle[2].getX(), triangle[2].getY(), triangle[2].getZ()); + glVertex3d(triangle[1].getX(), triangle[1].getY(), triangle[1].getZ()); + glColor3f(0, 0, 1); + glVertex3d(triangle[2].getX(), triangle[2].getY(), triangle[2].getZ()); + glVertex3d(triangle[0].getX(), triangle[0].getY(), triangle[0].getZ()); + glEnd(); + } else + { + glBegin(GL_TRIANGLES); + //glColor3f(1, 1, 1); + + + glVertex3d(triangle[0].getX(), triangle[0].getY(), triangle[0].getZ()); + glVertex3d(triangle[1].getX(), triangle[1].getY(), triangle[1].getZ()); + glVertex3d(triangle[2].getX(), triangle[2].getY(), triangle[2].getZ()); + + glVertex3d(triangle[2].getX(), triangle[2].getY(), triangle[2].getZ()); + glVertex3d(triangle[1].getX(), triangle[1].getY(), triangle[1].getZ()); + glVertex3d(triangle[0].getX(), triangle[0].getY(), triangle[0].getZ()); + glEnd(); + } + } +}; + +class TriangleGlDrawcallback : public btInternalTriangleIndexCallback +{ +public: + virtual void internalProcessTriangleIndex(btVector3* triangle,int partId,int triangleIndex) + { + (void)triangleIndex; + (void)partId; + + + glBegin(GL_TRIANGLES);//LINES); + glColor3f(1, 0, 0); + glVertex3d(triangle[0].getX(), triangle[0].getY(), triangle[0].getZ()); + glVertex3d(triangle[1].getX(), triangle[1].getY(), triangle[1].getZ()); + glColor3f(0, 1, 0); + glVertex3d(triangle[2].getX(), triangle[2].getY(), triangle[2].getZ()); + glVertex3d(triangle[1].getX(), triangle[1].getY(), triangle[1].getZ()); + glColor3f(0, 0, 1); + glVertex3d(triangle[2].getX(), triangle[2].getY(), triangle[2].getZ()); + glVertex3d(triangle[0].getX(), triangle[0].getY(), triangle[0].getZ()); + glEnd(); + } +}; + + +void GL_ShapeDrawer::drawSphere(btScalar radius, int lats, int longs) +{ + int i, j; + for(i = 0; i <= lats; i++) { + btScalar lat0 = SIMD_PI * (-btScalar(0.5) + (btScalar) (i - 1) / lats); + btScalar z0 = radius*sin(lat0); + btScalar zr0 = radius*cos(lat0); + + btScalar lat1 = SIMD_PI * (-btScalar(0.5) + (btScalar) i / lats); + btScalar z1 = radius*sin(lat1); + btScalar zr1 = radius*cos(lat1); + + glBegin(GL_QUAD_STRIP); + for(j = 0; j <= longs; j++) { + btScalar lng = 2 * SIMD_PI * (btScalar) (j - 1) / longs; + btScalar x = cos(lng); + btScalar y = sin(lng); + glNormal3f(x * zr1, y * zr1, z1); + glVertex3f(x * zr1, y * zr1, z1); + glNormal3f(x * zr0, y * zr0, z0); + glVertex3f(x * zr0, y * zr0, z0); + } + glEnd(); + } +} + +void GL_ShapeDrawer::drawCylinder(float radius,float halfHeight, int upAxis) +{ + + + glPushMatrix(); + switch (upAxis) + { + case 0: + glRotatef(-90.0, 0.0, 1.0, 0.0); + glTranslatef(0.0, 0.0, -halfHeight); + break; + case 1: + glRotatef(-90.0, 1.0, 0.0, 0.0); + glTranslatef(0.0, 0.0, -halfHeight); + break; + case 2: + + glTranslatef(0.0, 0.0, -halfHeight); + break; + default: + { + btAssert(0); + } + + } + + GLUquadricObj *quadObj = gluNewQuadric(); + + //The gluCylinder subroutine draws a cylinder that is oriented along the z axis. + //The base of the cylinder is placed at z = 0; the top of the cylinder is placed at z=height. + //Like a sphere, the cylinder is subdivided around the z axis into slices and along the z axis into stacks. + + gluQuadricDrawStyle(quadObj, (GLenum)GLU_FILL); + gluQuadricNormals(quadObj, (GLenum)GLU_SMOOTH); + + gluDisk(quadObj,0,radius,15, 10); + + gluCylinder(quadObj, radius, radius, 2.f*halfHeight, 15, 10); + glTranslatef(0.0f, 0.0f, 2.f*halfHeight); + glRotatef(-180.0f, 0.0f, 1.0f, 0.0f); + gluDisk(quadObj,0.f,radius,15, 10); + + glPopMatrix(); + gluDeleteQuadric(quadObj); +} + +GL_ShapeDrawer::ShapeCache* GL_ShapeDrawer::cache(btConvexShape* shape) +{ + ShapeCache* sc=(ShapeCache*)shape->getUserPointer(); + if(!sc) + { + sc=new(btAlignedAlloc(sizeof(ShapeCache),16)) ShapeCache(shape); + + m_shapecaches.push_back(sc); + shape->setUserPointer(sc); + + const btConvexPolyhedron* poly = shape->isPolyhedral() ? ((btPolyhedralConvexShape*) shape)->getConvexPolyhedron() : 0; + if (poly) + { + int i; + /* Build edges */ + const int nv= poly->m_vertices.size(); + if (nv) + { + const btVector3* pv=&poly->m_vertices[0]; + btAlignedObjectArray edges; + edges.resize(nv*nv,0); + + int maxIndices = 0; + for (i=0;im_faces.size();i++) + { + maxIndices += poly->m_faces[i].m_indices.size(); + } + sc->m_edges.reserve(maxIndices); + + for (i=0;im_faces.size();i++) + { + int numVerts = poly->m_faces[i].m_indices.size(); + if (numVerts>2) + { + int index0 = poly->m_faces[i].m_indices[0]; + int index1 = poly->m_faces[i].m_indices[1]; + int index2 = poly->m_faces[i].m_indices[2]; + int j = poly->m_faces[i].m_indices.size()-1; + const btVector3 nrm=btCross(pv[index1]-pv[index0],pv[index2]-pv[index0]).normalized(); + + for (int v=0;vm_faces[i].m_indices.size();j=v++) + { + { + const unsigned int a=poly->m_faces[i].m_indices[j]; + const unsigned int b=poly->m_faces[i].m_indices[v]; + int edgeIndex = btMin(a,b)*nv+btMax(a,b); + ShapeCache::Edge*& e=edges[edgeIndex]; + if(!e) + { + sc->m_edges.push_back(ShapeCache::Edge()); + e=&sc->m_edges[sc->m_edges.size()-1]; + e->n[0]=nrm;e->n[1]=-nrm; + e->v[0]=a;e->v[1]=b; + } + else + { + e->n[1]=nrm; + } + } + } + } + } + } + + } else + { + + sc->m_shapehull.buildHull(shape->getMargin()); + + + /* Build edges */ + const int ni=sc->m_shapehull.numIndices(); + const int nv=sc->m_shapehull.numVertices(); + const unsigned int* pi=sc->m_shapehull.getIndexPointer(); + const btVector3* pv=sc->m_shapehull.getVertexPointer(); + btAlignedObjectArray edges; + sc->m_edges.reserve(ni); + edges.resize(nv*nv,0); + for(int i=0;im_edges.push_back(ShapeCache::Edge()); + e=&sc->m_edges[sc->m_edges.size()-1]; + e->n[0]=nrm;e->n[1]=-nrm; + e->v[0]=a;e->v[1]=b; + } + else + { + e->n[1]=nrm; + } + } + } + } + } + return(sc); +} + +void renderSquareA(float x, float y, float z) +{ + glBegin(GL_LINE_LOOP); + glVertex3f(x, y, z); + glVertex3f(x + 10.f, y, z); + glVertex3f(x + 10.f, y + 10.f, z); + glVertex3f(x, y + 10.f, z); + glEnd(); +} + +inline void glDrawVector(const btVector3& v) { glVertex3d(v[0], v[1], v[2]); } + + +void GL_ShapeDrawer::drawOpenGL(btScalar* m, const btCollisionShape* shape, const btVector3& color,int debugMode,const btVector3& worldBoundsMin,const btVector3& worldBoundsMax) +{ + + if (shape->getShapeType() == CUSTOM_CONVEX_SHAPE_TYPE) + { + btVector3 org(m[12], m[13], m[14]); + btVector3 dx(m[0], m[1], m[2]); + btVector3 dy(m[4], m[5], m[6]); +// btVector3 dz(m[8], m[9], m[10]); + const btBoxShape* boxShape = static_cast(shape); + btVector3 halfExtent = boxShape->getHalfExtentsWithMargin(); + dx *= halfExtent[0]; + dy *= halfExtent[1]; +// dz *= halfExtent[2]; + glColor3f(1,1,1); + glDisable(GL_LIGHTING); + glLineWidth(2); + + glBegin(GL_LINE_LOOP); + glDrawVector(org - dx - dy); + glDrawVector(org - dx + dy); + glDrawVector(org + dx + dy); + glDrawVector(org + dx - dy); + glEnd(); + return; + } + else if((shape->getShapeType() == BOX_SHAPE_PROXYTYPE) && (debugMode & btIDebugDraw::DBG_FastWireframe)) + { + btVector3 org(m[12], m[13], m[14]); + btVector3 dx(m[0], m[1], m[2]); + btVector3 dy(m[4], m[5], m[6]); + btVector3 dz(m[8], m[9], m[10]); + const btBoxShape* boxShape = static_cast(shape); + btVector3 halfExtent = boxShape->getHalfExtentsWithMargin(); + dx *= halfExtent[0]; + dy *= halfExtent[1]; + dz *= halfExtent[2]; + glBegin(GL_LINE_LOOP); + glDrawVector(org - dx - dy - dz); + glDrawVector(org + dx - dy - dz); + glDrawVector(org + dx + dy - dz); + glDrawVector(org - dx + dy - dz); + glDrawVector(org - dx + dy + dz); + glDrawVector(org + dx + dy + dz); + glDrawVector(org + dx - dy + dz); + glDrawVector(org - dx - dy + dz); + glEnd(); + glBegin(GL_LINES); + glDrawVector(org + dx - dy - dz); + glDrawVector(org + dx - dy + dz); + glDrawVector(org + dx + dy - dz); + glDrawVector(org + dx + dy + dz); + glDrawVector(org - dx - dy - dz); + glDrawVector(org - dx + dy - dz); + glDrawVector(org - dx - dy + dz); + glDrawVector(org - dx + dy + dz); + glEnd(); + return; + } + + glPushMatrix(); + btglMultMatrix(m); + + + if (shape->getShapeType() == UNIFORM_SCALING_SHAPE_PROXYTYPE) + { + const btUniformScalingShape* scalingShape = static_cast(shape); + const btConvexShape* convexShape = scalingShape->getChildShape(); + float scalingFactor = (float)scalingShape->getUniformScalingFactor(); + { + btScalar tmpScaling[4][4]={{scalingFactor,0,0,0}, + {0,scalingFactor,0,0}, + {0,0,scalingFactor,0}, + {0,0,0,1}}; + + drawOpenGL( (btScalar*)tmpScaling,convexShape,color,debugMode,worldBoundsMin,worldBoundsMax); + } + glPopMatrix(); + return; + } + + if (shape->getShapeType() == COMPOUND_SHAPE_PROXYTYPE) + { + const btCompoundShape* compoundShape = static_cast(shape); + for (int i=compoundShape->getNumChildShapes()-1;i>=0;i--) + { + btTransform childTrans = compoundShape->getChildTransform(i); + const btCollisionShape* colShape = compoundShape->getChildShape(i); + btScalar childMat[16]; + childTrans.getOpenGLMatrix(childMat); + drawOpenGL(childMat,colShape,color,debugMode,worldBoundsMin,worldBoundsMax); + } + + } else + { + if(m_textureenabled&&(!m_textureinitialized)) + { + GLubyte* image=new GLubyte[256*256*3]; + for(int y=0;y<256;++y) + { + const int t=y>>4; + GLubyte* pi=image+y*256*3; + for(int x=0;x<256;++x) + { + const int s=x>>4; + const GLubyte b=180; + GLubyte c=b+((s+t&1)&1)*(255-b); + pi[0]=pi[1]=pi[2]=c;pi+=3; + } + } + + glGenTextures(1,(GLuint*)&m_texturehandle); + glBindTexture(GL_TEXTURE_2D,m_texturehandle); + glTexEnvf(GL_TEXTURE_ENV,GL_TEXTURE_ENV_MODE,GL_MODULATE); + glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_MIN_FILTER,GL_LINEAR_MIPMAP_LINEAR); + glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_MAG_FILTER,GL_LINEAR_MIPMAP_LINEAR); + glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_WRAP_S,GL_REPEAT); + glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_WRAP_T,GL_REPEAT); + gluBuild2DMipmaps(GL_TEXTURE_2D,3,256,256,GL_RGB,GL_UNSIGNED_BYTE,image); + delete[] image; + + + } + + glMatrixMode(GL_TEXTURE); + glLoadIdentity(); + glScalef(0.025f,0.025f,0.025f); + glMatrixMode(GL_MODELVIEW); + + static const GLfloat planex[]={1,0,0,0}; + // static const GLfloat planey[]={0,1,0,0}; + static const GLfloat planez[]={0,0,1,0}; + glTexGenfv(GL_S,GL_OBJECT_PLANE,planex); + glTexGenfv(GL_T,GL_OBJECT_PLANE,planez); + glTexGeni(GL_S,GL_TEXTURE_GEN_MODE,GL_OBJECT_LINEAR); + glTexGeni(GL_T,GL_TEXTURE_GEN_MODE,GL_OBJECT_LINEAR); + glEnable(GL_TEXTURE_GEN_S); + glEnable(GL_TEXTURE_GEN_T); + glEnable(GL_TEXTURE_GEN_R); + m_textureinitialized=true; + + + + + //drawCoordSystem(); + + //glPushMatrix(); + glEnable(GL_COLOR_MATERIAL); + if(m_textureenabled) + { + glEnable(GL_TEXTURE_2D); + glBindTexture(GL_TEXTURE_2D,m_texturehandle); + } else + { + glDisable(GL_TEXTURE_2D); + } + + + glColor3f(color.x(),color.y(), color.z()); + + bool useWireframeFallback = true; + + if (!(debugMode & btIDebugDraw::DBG_DrawWireframe)) + { + ///you can comment out any of the specific cases, and use the default + + ///the benefit of 'default' is that it approximates the actual collision shape including collision margin + //int shapetype=m_textureenabled?MAX_BROADPHASE_COLLISION_TYPES:shape->getShapeType(); + int shapetype=shape->getShapeType(); + switch (shapetype) + { + + case SPHERE_SHAPE_PROXYTYPE: + { + const btSphereShape* sphereShape = static_cast(shape); + float radius = sphereShape->getMargin();//radius doesn't include the margin, so draw with margin + drawSphere(radius,10,10); + useWireframeFallback = false; + break; + } + + case BOX_SHAPE_PROXYTYPE: + { + const btBoxShape* boxShape = static_cast(shape); + btVector3 halfExtent = boxShape->getHalfExtentsWithMargin(); + + static int indices[36] = { + 0,1,2, + 3,2,1, + 4,0,6, + 6,0,2, + 5,1,4, + 4,1,0, + 7,3,1, + 7,1,5, + 5,4,7, + 7,4,6, + 7,2,3, + 7,6,2}; + + btVector3 vertices[8]={ + btVector3(halfExtent[0],halfExtent[1],halfExtent[2]), + btVector3(-halfExtent[0],halfExtent[1],halfExtent[2]), + btVector3(halfExtent[0],-halfExtent[1],halfExtent[2]), + btVector3(-halfExtent[0],-halfExtent[1],halfExtent[2]), + btVector3(halfExtent[0],halfExtent[1],-halfExtent[2]), + btVector3(-halfExtent[0],halfExtent[1],-halfExtent[2]), + btVector3(halfExtent[0],-halfExtent[1],-halfExtent[2]), + btVector3(-halfExtent[0],-halfExtent[1],-halfExtent[2])}; +#if 1 + glBegin (GL_TRIANGLES); + int si=36; + for (int i=0;i(shape); + int upIndex = coneShape->getConeUpIndex(); + float radius = coneShape->getRadius();//+coneShape->getMargin(); + float height = coneShape->getHeight();//+coneShape->getMargin(); + switch (upIndex) + { + case 0: + glRotatef(90.0, 0.0, 1.0, 0.0); + break; + case 1: + glRotatef(-90.0, 1.0, 0.0, 0.0); + break; + case 2: + break; + default: + { + } + }; + + glTranslatef(0.0, 0.0, -0.5*height); + glutSolidCone(radius,height,10,10); + useWireframeFallback = false; + break; + + } +#endif + + case STATIC_PLANE_PROXYTYPE: + { + const btStaticPlaneShape* staticPlaneShape = static_cast(shape); + btScalar planeConst = staticPlaneShape->getPlaneConstant(); + const btVector3& planeNormal = staticPlaneShape->getPlaneNormal(); + btVector3 planeOrigin = planeNormal * planeConst; + btVector3 vec0,vec1; + btPlaneSpace1(planeNormal,vec0,vec1); + btScalar vecLen = 100.f; + btVector3 pt0 = planeOrigin + vec0*vecLen; + btVector3 pt1 = planeOrigin - vec0*vecLen; + btVector3 pt2 = planeOrigin + vec1*vecLen; + btVector3 pt3 = planeOrigin - vec1*vecLen; + glBegin(GL_LINES); + glVertex3f(pt0.getX(),pt0.getY(),pt0.getZ()); + glVertex3f(pt1.getX(),pt1.getY(),pt1.getZ()); + glVertex3f(pt2.getX(),pt2.getY(),pt2.getZ()); + glVertex3f(pt3.getX(),pt3.getY(),pt3.getZ()); + glEnd(); + + + break; + + } + +/* + case CYLINDER_SHAPE_PROXYTYPE: + { + const btCylinderShape* cylinder = static_cast(shape); + int upAxis = cylinder->getUpAxis(); + + + float radius = cylinder->getRadius(); + float halfHeight = cylinder->getHalfExtentsWithMargin()[upAxis]; + + drawCylinder(radius,halfHeight,upAxis); + + break; + } +*/ + + case MULTI_SPHERE_SHAPE_PROXYTYPE: + { + const btMultiSphereShape* multiSphereShape = static_cast(shape); + + btTransform childTransform; + childTransform.setIdentity(); + + + for (int i = multiSphereShape->getSphereCount()-1; i>=0;i--) + { + btSphereShape sc(multiSphereShape->getSphereRadius(i)); + childTransform.setOrigin(multiSphereShape->getSpherePosition(i)); + btScalar childMat[16]; + childTransform.getOpenGLMatrix(childMat); + drawOpenGL(childMat,&sc,color,debugMode,worldBoundsMin,worldBoundsMax); + } + + break; + } + + default: + { + if (shape->isConvex()) + { + const btConvexPolyhedron* poly = shape->isPolyhedral() ? ((btPolyhedralConvexShape*) shape)->getConvexPolyhedron() : 0; + if (poly) + { + int i; + glBegin (GL_TRIANGLES); + for (i=0;im_faces.size();i++) + { + btVector3 centroid(0,0,0); + int numVerts = poly->m_faces[i].m_indices.size(); + if (numVerts>2) + { + btVector3 v1 = poly->m_vertices[poly->m_faces[i].m_indices[0]]; + for (int v=0;vm_faces[i].m_indices.size()-2;v++) + { + + btVector3 v2 = poly->m_vertices[poly->m_faces[i].m_indices[v+1]]; + btVector3 v3 = poly->m_vertices[poly->m_faces[i].m_indices[v+2]]; + btVector3 normal = (v3-v1).cross(v2-v1); + normal.normalize (); + glNormal3f(normal.getX(),normal.getY(),normal.getZ()); + glVertex3f (v1.x(), v1.y(), v1.z()); + glVertex3f (v2.x(), v2.y(), v2.z()); + glVertex3f (v3.x(), v3.y(), v3.z()); + } + } + } + glEnd (); + } else + { + ShapeCache* sc=cache((btConvexShape*)shape); + //glutSolidCube(1.0); + btShapeHull* hull = &sc->m_shapehull/*(btShapeHull*)shape->getUserPointer()*/; + + if (hull && hull->numTriangles () > 0) + { + int index = 0; + const unsigned int* idx = hull->getIndexPointer(); + const btVector3* vtx = hull->getVertexPointer(); + + glBegin (GL_TRIANGLES); + + for (int i = 0; i < hull->numTriangles (); i++) + { + int i1 = index++; + int i2 = index++; + int i3 = index++; + btAssert(i1 < hull->numIndices () && + i2 < hull->numIndices () && + i3 < hull->numIndices ()); + + int index1 = idx[i1]; + int index2 = idx[i2]; + int index3 = idx[i3]; + btAssert(index1 < hull->numVertices () && + index2 < hull->numVertices () && + index3 < hull->numVertices ()); + + btVector3 v1 = vtx[index1]; + btVector3 v2 = vtx[index2]; + btVector3 v3 = vtx[index3]; + btVector3 normal = (v3-v1).cross(v2-v1); + normal.normalize (); + glNormal3f(normal.getX(),normal.getY(),normal.getZ()); + glVertex3f (v1.x(), v1.y(), v1.z()); + glVertex3f (v2.x(), v2.y(), v2.z()); + glVertex3f (v3.x(), v3.y(), v3.z()); + + } + glEnd (); + + } + } + } + } + } + + } + + + glNormal3f(0,1,0); + + + /// for polyhedral shapes + if (debugMode==btIDebugDraw::DBG_DrawFeaturesText && (shape->isPolyhedral())) + { + btPolyhedralConvexShape* polyshape = (btPolyhedralConvexShape*) shape; + + { + + glColor3f(1.f, 1.f, 1.f); + int i; + for (i=0;igetNumVertices();i++) + { + btVector3 vtx; + polyshape->getVertex(i,vtx); + char buf[12]; + sprintf(buf," %d",i); + //btDrawString(BMF_GetFont(BMF_kHelvetica10),buf); + } + + for (i=0;igetNumPlanes();i++) + { + btVector3 normal; + btVector3 vtx; + polyshape->getPlane(normal,vtx,i); + //btScalar d = vtx.dot(normal); + + //char buf[12]; + //sprintf(buf," plane %d",i); + //btDrawString(BMF_GetFont(BMF_kHelvetica10),buf); + + } + } + + } + + +#ifdef USE_DISPLAY_LISTS + + if (shape->getShapeType() == TRIANGLE_MESH_SHAPE_PROXYTYPE||shape->getShapeType() == GIMPACT_SHAPE_PROXYTYPE) + { + GLuint dlist = OGL_get_displaylist_for_shape((btCollisionShape * )shape); + if (dlist) + { + glCallList(dlist); + } + else + { +#else + if (shape->isConcave() && !shape->isInfinite()) + { + btConcaveShape* concaveMesh = (btConcaveShape*) shape; + + GlDrawcallback drawCallback; + drawCallback.m_wireframe = (debugMode & btIDebugDraw::DBG_DrawWireframe)!=0; + + concaveMesh->processAllTriangles(&drawCallback,worldBoundsMin,worldBoundsMax); + + } +#endif + +#ifdef USE_DISPLAY_LISTS + } +} +#endif + + + + + + } + glPopMatrix(); + +} + +// +void GL_ShapeDrawer::drawShadow(btScalar* m,const btVector3& extrusion,const btCollisionShape* shape,const btVector3& worldBoundsMin,const btVector3& worldBoundsMax) +{ + glPushMatrix(); + btglMultMatrix(m); + if(shape->getShapeType() == UNIFORM_SCALING_SHAPE_PROXYTYPE) + { + const btUniformScalingShape* scalingShape = static_cast(shape); + const btConvexShape* convexShape = scalingShape->getChildShape(); + float scalingFactor = (float)scalingShape->getUniformScalingFactor(); + btScalar tmpScaling[4][4]={ {scalingFactor,0,0,0}, + {0,scalingFactor,0,0}, + {0,0,scalingFactor,0}, + {0,0,0,1}}; + drawShadow((btScalar*)tmpScaling,extrusion,convexShape,worldBoundsMin,worldBoundsMax); + glPopMatrix(); + return; + } + else if(shape->getShapeType()==COMPOUND_SHAPE_PROXYTYPE) + { + const btCompoundShape* compoundShape = static_cast(shape); + for (int i=compoundShape->getNumChildShapes()-1;i>=0;i--) + { + btTransform childTrans = compoundShape->getChildTransform(i); + const btCollisionShape* colShape = compoundShape->getChildShape(i); + btScalar childMat[16]; + childTrans.getOpenGLMatrix(childMat); + drawShadow(childMat,extrusion*childTrans.getBasis(),colShape,worldBoundsMin,worldBoundsMax); + } + } + else + { + // bool useWireframeFallback = true; + if (shape->isConvex()) + { + + const btConvexPolyhedron* poly = shape->isPolyhedral() ? ((btPolyhedralConvexShape*) shape)->getConvexPolyhedron() : 0; + ShapeCache* sc=cache((btConvexShape*)shape); + btShapeHull* hull =&sc->m_shapehull; + const btVector3* vertexPointer = 0; + + vertexPointer = (poly && poly->m_vertices.size())? &poly->m_vertices[0] : 0; + if (!vertexPointer) + vertexPointer = hull->numVertices() ? hull->getVertexPointer():0; + + if (vertexPointer) + { + glBegin(GL_QUADS); + for(int i=0;im_edges.size();++i) + { + const btScalar d=btDot(sc->m_edges[i].n[0],extrusion); + if((d*btDot(sc->m_edges[i].n[1],extrusion))<0) + { + const int q= d<0?1:0; + const btVector3& a= vertexPointer[sc->m_edges[i].v[q]]; + const btVector3& b= vertexPointer[sc->m_edges[i].v[1-q]]; + glVertex3f(a[0],a[1],a[2]); + glVertex3f(b[0],b[1],b[2]); + glVertex3f(b[0]+extrusion[0],b[1]+extrusion[1],b[2]+extrusion[2]); + glVertex3f(a[0]+extrusion[0],a[1]+extrusion[1],a[2]+extrusion[2]); + } + } + glEnd(); + } + } + } + + + if (shape->isConcave())//>getShapeType() == TRIANGLE_MESH_SHAPE_PROXYTYPE||shape->getShapeType() == GIMPACT_SHAPE_PROXYTYPE) + // if (shape->getShapeType() == TRIANGLE_MESH_SHAPE_PROXYTYPE) + { + btConcaveShape* concaveMesh = (btConcaveShape*) shape; + + GlDrawcallback drawCallback; + drawCallback.m_wireframe = false; + + concaveMesh->processAllTriangles(&drawCallback,worldBoundsMin,worldBoundsMax); + + } + glPopMatrix(); + +} + +// +GL_ShapeDrawer::GL_ShapeDrawer() +{ + m_texturehandle = 0; + m_textureenabled = false; + m_textureinitialized = false; +} + +GL_ShapeDrawer::~GL_ShapeDrawer() +{ + int i; + for (i=0;i~ShapeCache(); + btAlignedFree(m_shapecaches[i]); + } + m_shapecaches.clear(); + if(m_textureinitialized) + { + glDeleteTextures(1,(const GLuint*) &m_texturehandle); + } +} + + diff --git a/Extras/RigidBodyGpuPipeline/dynamics/testbed/GL_ShapeDrawer.h b/Extras/RigidBodyGpuPipeline/dynamics/testbed/GL_ShapeDrawer.h new file mode 100644 index 000000000..65bf29de4 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/testbed/GL_ShapeDrawer.h @@ -0,0 +1,70 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +#ifndef GL_SHAPE_DRAWER_H +#define GL_SHAPE_DRAWER_H + +class btCollisionShape; +class btShapeHull; +#include "LinearMath/btAlignedObjectArray.h" +#include "LinearMath/btVector3.h" + +#include "BulletCollision/CollisionShapes/btShapeHull.h" + +/// OpenGL shape drawing +class GL_ShapeDrawer +{ +protected: + struct ShapeCache + { + struct Edge { btVector3 n[2];int v[2]; }; + ShapeCache(btConvexShape* s) : m_shapehull(s) {} + btShapeHull m_shapehull; + btAlignedObjectArray m_edges; + }; + //clean-up memory of dynamically created shape hulls + btAlignedObjectArray m_shapecaches; + unsigned int m_texturehandle; + bool m_textureenabled; + bool m_textureinitialized; + + + ShapeCache* cache(btConvexShape*); + +public: + GL_ShapeDrawer(); + + virtual ~GL_ShapeDrawer(); + + ///drawOpenGL might allocate temporary memoty, stores pointer in shape userpointer + virtual void drawOpenGL(btScalar* m, const btCollisionShape* shape, const btVector3& color,int debugMode,const btVector3& worldBoundsMin,const btVector3& worldBoundsMax); + virtual void drawShadow(btScalar* m, const btVector3& extrusion,const btCollisionShape* shape,const btVector3& worldBoundsMin,const btVector3& worldBoundsMax); + + bool enableTexture(bool enable) { bool p=m_textureenabled;m_textureenabled=enable;return(p); } + bool hasTextureEnabled() const + { + return m_textureenabled; + } + + static void drawCylinder(float radius,float halfHeight, int upAxis); + void drawSphere(btScalar r, int lats, int longs); + static void drawCoordSystem(); + +}; + +void OGL_displaylist_register_shape(btCollisionShape * shape); +void OGL_displaylist_clean(); + +#endif //GL_SHAPE_DRAWER_H + diff --git a/Extras/RigidBodyGpuPipeline/dynamics/testbed/GL_Simplex1to4.cpp b/Extras/RigidBodyGpuPipeline/dynamics/testbed/GL_Simplex1to4.cpp new file mode 100644 index 000000000..b364a1b98 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/testbed/GL_Simplex1to4.cpp @@ -0,0 +1,76 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +#include "GL_Simplex1to4.h" +#include "BulletCollision/NarrowPhaseCollision/btSimplexSolverInterface.h" +#include "GL_ShapeDrawer.h" +#ifdef _WIN32 +#include +#endif + +//think different +#if defined(__APPLE__) && !defined (VMDMESA) +#include +#include +#else +#include +#endif +#include "GlutStuff.h" +#include "LinearMath/btTransform.h" + +GL_Simplex1to4::GL_Simplex1to4() +:m_simplexSolver(0) +{ +} + +/// +/// Debugging method calcClosest calculates the closest point to the origin, using m_simplexSolver +/// +void GL_Simplex1to4::calcClosest(btScalar* m) +{ + btTransform tr; + tr.setFromOpenGLMatrix(m); + + + + GL_ShapeDrawer::drawCoordSystem(); + + if (m_simplexSolver) + { + m_simplexSolver->reset(); + bool res; + + btVector3 v; + + for (int i=0;iaddVertex(v,v,btVector3(0.f,0.f,0.f)); + res = m_simplexSolver->closest(v); + } + + //draw v? + glDisable(GL_LIGHTING); + glBegin(GL_LINES); + btglColor3(1.f, 0.f, 0.f); + btglVertex3(0.f, 0.f, 0.f); + btglVertex3(v.x(),v.y(),v.z()); + glEnd(); + + glEnable(GL_LIGHTING); + + + } + +} diff --git a/Extras/RigidBodyGpuPipeline/dynamics/testbed/GL_Simplex1to4.h b/Extras/RigidBodyGpuPipeline/dynamics/testbed/GL_Simplex1to4.h new file mode 100644 index 000000000..c75e3573b --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/testbed/GL_Simplex1to4.h @@ -0,0 +1,40 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +#ifndef GL_SIMPLEX_1TO4_H +#define GL_SIMPLEX_1TO4_H + +#include "BulletCollision/CollisionShapes/btTetrahedronShape.h" + +#include "BulletCollision/NarrowPhaseCollision/btSimplexSolverInterface.h" + +///GL_Simplex1to4 is a class to debug a Simplex Solver with 1 to 4 points. +///Can be used by GJK. +class GL_Simplex1to4 : public btBU_Simplex1to4 +{ + btSimplexSolverInterface* m_simplexSolver; + + public: + + GL_Simplex1to4(); + + void calcClosest(btScalar* m); + + void setSimplexSolver(btSimplexSolverInterface* simplexSolver) { + m_simplexSolver = simplexSolver; + } + +}; + +#endif //GL_SIMPLEX_1TO4_H diff --git a/Extras/RigidBodyGpuPipeline/dynamics/testbed/GlutDemoApplication.cpp b/Extras/RigidBodyGpuPipeline/dynamics/testbed/GlutDemoApplication.cpp new file mode 100644 index 000000000..0ceaede76 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/testbed/GlutDemoApplication.cpp @@ -0,0 +1,87 @@ + +#ifndef _WINDOWS + +#include "GlutDemoApplication.h" + +#include "GlutStuff.h" + +#include "BulletDynamics/Dynamics/btDiscreteDynamicsWorld.h" +#include "BulletDynamics/Dynamics/btRigidBody.h" + +void GlutDemoApplication::updateModifierKeys() +{ + m_modifierKeys = 0; + if (glutGetModifiers() & GLUT_ACTIVE_ALT) + m_modifierKeys |= BT_ACTIVE_ALT; + + if (glutGetModifiers() & GLUT_ACTIVE_CTRL) + m_modifierKeys |= BT_ACTIVE_CTRL; + + if (glutGetModifiers() & GLUT_ACTIVE_SHIFT) + m_modifierKeys |= BT_ACTIVE_SHIFT; +} + +void GlutDemoApplication::specialKeyboard(int key, int x, int y) +{ + (void)x; + (void)y; + + switch (key) + { + case GLUT_KEY_F1: + { + + break; + } + + case GLUT_KEY_F2: + { + + break; + } + + + case GLUT_KEY_END: + { + int numObj = getDynamicsWorld()->getNumCollisionObjects(); + if (numObj) + { + btCollisionObject* obj = getDynamicsWorld()->getCollisionObjectArray()[numObj-1]; + + getDynamicsWorld()->removeCollisionObject(obj); + btRigidBody* body = btRigidBody::upcast(obj); + if (body && body->getMotionState()) + { + delete body->getMotionState(); + } + delete obj; + + + } + break; + } + case GLUT_KEY_LEFT : stepLeft(); break; + case GLUT_KEY_RIGHT : stepRight(); break; + case GLUT_KEY_UP : stepFront(); break; + case GLUT_KEY_DOWN : stepBack(); break; + case GLUT_KEY_PAGE_UP : zoomIn(); break; + case GLUT_KEY_PAGE_DOWN : zoomOut(); break; + case GLUT_KEY_HOME : toggleIdle(); break; + default: + // std::cout << "unused (special) key : " << key << std::endl; + break; + } + + glutPostRedisplay(); + +} + +void GlutDemoApplication::swapBuffers() +{ + glutSwapBuffers(); + +} + +#endif //_WINDOWS + + diff --git a/Extras/RigidBodyGpuPipeline/dynamics/testbed/GlutDemoApplication.h b/Extras/RigidBodyGpuPipeline/dynamics/testbed/GlutDemoApplication.h new file mode 100644 index 000000000..e2727a777 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/testbed/GlutDemoApplication.h @@ -0,0 +1,34 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + +#ifndef GLUT_DEMO_APPLICATION_H +#define GLUT_DEMO_APPLICATION_H + +#include "DemoApplication.h" + +class GlutDemoApplication : public DemoApplication +{ +public: + + void specialKeyboard(int key, int x, int y); + + virtual void swapBuffers(); + + virtual void updateModifierKeys(); + +}; +#endif //GLUT_DEMO_APPLICATION_H + diff --git a/Extras/RigidBodyGpuPipeline/dynamics/testbed/GlutStuff.cpp b/Extras/RigidBodyGpuPipeline/dynamics/testbed/GlutStuff.cpp new file mode 100644 index 000000000..92872a130 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/testbed/GlutStuff.cpp @@ -0,0 +1,119 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#ifndef _WINDOWS + +#include "DemoApplication.h" + +//glut is C code, this global gDemoApplication links glut to the C++ demo +static DemoApplication* gDemoApplication = 0; + + +#include "GlutStuff.h" + +static void glutKeyboardCallback(unsigned char key, int x, int y) +{ + gDemoApplication->keyboardCallback(key,x,y); +} + +static void glutKeyboardUpCallback(unsigned char key, int x, int y) +{ + gDemoApplication->keyboardUpCallback(key,x,y); +} + +static void glutSpecialKeyboardCallback(int key, int x, int y) +{ + gDemoApplication->specialKeyboard(key,x,y); +} + +static void glutSpecialKeyboardUpCallback(int key, int x, int y) +{ + gDemoApplication->specialKeyboardUp(key,x,y); +} + + +static void glutReshapeCallback(int w, int h) +{ + gDemoApplication->reshape(w,h); +} + +static void glutMoveAndDisplayCallback() +{ + gDemoApplication->moveAndDisplay(); +} + +static void glutMouseFuncCallback(int button, int state, int x, int y) +{ + gDemoApplication->mouseFunc(button,state,x,y); +} + + +static void glutMotionFuncCallback(int x,int y) +{ + gDemoApplication->mouseMotionFunc(x,y); +} + + +static void glutDisplayCallback(void) +{ + gDemoApplication->displayCallback(); +} + + +int glutmain(int argc, char **argv,int width,int height,const char* title,DemoApplication* demoApp) { + + gDemoApplication = demoApp; + + glutInit(&argc, argv); + glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGBA | GLUT_DEPTH | GLUT_STENCIL); + glutInitWindowPosition(0, 0); + glutInitWindowSize(width, height); + glutCreateWindow(title); +#ifdef BT_USE_FREEGLUT + glutSetOption (GLUT_ACTION_ON_WINDOW_CLOSE, GLUT_ACTION_GLUTMAINLOOP_RETURNS); +#endif + + gDemoApplication->myinit(); + + glutKeyboardFunc(glutKeyboardCallback); + glutKeyboardUpFunc(glutKeyboardUpCallback); + glutSpecialFunc(glutSpecialKeyboardCallback); + glutSpecialUpFunc(glutSpecialKeyboardUpCallback); + + glutReshapeFunc(glutReshapeCallback); + //createMenu(); + glutIdleFunc(glutMoveAndDisplayCallback); + glutMouseFunc(glutMouseFuncCallback); + glutPassiveMotionFunc(glutMotionFuncCallback); + glutMotionFunc(glutMotionFuncCallback); + glutDisplayFunc( glutDisplayCallback ); + + glutMoveAndDisplayCallback(); + +//enable vsync to avoid tearing on Apple (todo: for Windows) + +#if defined(__APPLE__) && !defined (VMDMESA) +int swap_interval = 1; +CGLContextObj cgl_context = CGLGetCurrentContext(); +CGLSetParameter(cgl_context, kCGLCPSwapInterval, &swap_interval); +#endif + + + + return 0; +} + + +#endif //_WINDOWS diff --git a/Extras/RigidBodyGpuPipeline/dynamics/testbed/GlutStuff.h b/Extras/RigidBodyGpuPipeline/dynamics/testbed/GlutStuff.h new file mode 100644 index 000000000..5891e769d --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/testbed/GlutStuff.h @@ -0,0 +1,86 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2012 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +#ifndef GLUT_STUFF_H +#define GLUT_STUFF_H + +#ifdef _WIN32//for glut.h +#include +#endif + +//think different +#if defined(__APPLE__) && !defined (VMDMESA) +#include +#include +#include +#include +#else + + +#ifdef _WINDOWS +#include +#include +#include +#else +#include +#include +#endif //_WINDOWS +#endif //APPLE + +#ifdef _WINDOWS +#define BT_ACTIVE_ALT VK_LMENU + +#else +#define BT_KEY_K 'k' +#define BT_KEY_LEFT GLUT_KEY_LEFT +#define BT_KEY_RIGHT GLUT_KEY_RIGHT +#define BT_KEY_UP GLUT_KEY_UP +#define BT_KEY_DOWN GLUT_KEY_DOWN +#define BT_KEY_F1 GLUT_KEY_F1 +#define BT_KEY_F2 GLUT_KEY_F2 +#define BT_KEY_F3 GLUT_KEY_F3 +#define BT_KEY_F4 GLUT_KEY_F4 +#define BT_KEY_F5 GLUT_KEY_F5 +#define BT_KEY_PAGEUP GLUT_KEY_PAGE_UP +#define BT_KEY_PAGEDOWN GLUT_KEY_PAGE_DOWN +#define BT_KEY_END GLUT_KEY_END +#define BT_KEY_HOME GLUT_KEY_HOME +#define BT_ACTIVE_ALT GLUT_ACTIVE_ALT +#define BT_ACTIVE_CTRL GLUT_ACTIVE_ALT +#define BT_ACTIVE_SHIFT GLUT_ACTIVE_SHIFT +#endif + +#if BT_USE_FREEGLUT +#include "GL/freeglut_ext.h" //to be able to return from glutMainLoop() +#endif + + + +class DemoApplication; + +int glutmain(int argc, char **argv,int width,int height,const char* title,DemoApplication* demoApp); + +#if defined(BT_USE_DOUBLE_PRECISION) +#define btglLoadMatrix glLoadMatrixd +#define btglMultMatrix glMultMatrixd +#define btglColor3 glColor3d +#define btglVertex3 glVertex3d +#else +#define btglLoadMatrix glLoadMatrixf +#define btglMultMatrix glMultMatrixf +#define btglColor3 glColor3f +#define btglVertex3 glVertex3d +#endif + +#endif //GLUT_STUFF_H diff --git a/Extras/RigidBodyGpuPipeline/dynamics/testbed/RenderTexture.cpp b/Extras/RigidBodyGpuPipeline/dynamics/testbed/RenderTexture.cpp new file mode 100644 index 000000000..2c8b88b82 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/testbed/RenderTexture.cpp @@ -0,0 +1,86 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include "RenderTexture.h" +#include + + +renderTexture::renderTexture(int width,int height) +:m_height(height),m_width(width) +{ + m_buffer = new unsigned char[m_width*m_height*4]; + + //clear screen + memset(m_buffer,0,m_width*m_height*4); + + //clear screen version 2 + for (int x=0;x>=1; + y++; + } + x++; + } + //xx+=16; + xx+=10; + } +} + +renderTexture::~renderTexture() +{ + delete [] m_buffer; +} + + + diff --git a/Extras/RigidBodyGpuPipeline/dynamics/testbed/RenderTexture.h b/Extras/RigidBodyGpuPipeline/dynamics/testbed/RenderTexture.h new file mode 100644 index 000000000..1aee51d79 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/testbed/RenderTexture.h @@ -0,0 +1,73 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#ifndef RENDER_TEXTURE_H +#define RENDER_TEXTURE_H + +#include "LinearMath/btVector3.h" +#include "GLDebugFont.h" + +/// +///renderTexture provides a software-render context (setpixel/printf) +/// +class renderTexture +{ + int m_height; + int m_width; + unsigned char* m_buffer; + +public: + + renderTexture(int width,int height); + ~renderTexture(); + + ///rgba input is in range [0..1] for each component + inline void setPixel(int x,int y,const btVector4& rgba) + { + unsigned char* pixel = &m_buffer[ (x+y*m_width) * 4]; + + pixel[0] = (unsigned char)(255.*rgba.getX()); + pixel[1] = (unsigned char)(255.*rgba.getY()); + pixel[2] = (unsigned char)(255.*rgba.getZ()); + pixel[3] = (unsigned char)(255.*rgba.getW()); + } + + inline void addPixel(int x,int y,const btVector4& rgba) + { + unsigned char* pixel = &m_buffer[ (x+y*m_width) * 4]; + pixel[0] = (unsigned char)btMin(btScalar(255.f),((btScalar)pixel[0] + btScalar(255.f)*rgba.getX())); + pixel[1] = (unsigned char)btMin(btScalar(255.f),((btScalar)pixel[1] + btScalar(255.f)*rgba.getY())); + pixel[2] = (unsigned char)btMin(btScalar(255.f),((btScalar)pixel[2] + btScalar(255.f)*rgba.getZ())); +// pixel[3] = (unsigned char)btMin(btScalar(255.f),((btScalar)pixel[3] + btScalar(255.f)*rgba.getW())); + } + + inline btVector4 getPixel(int x,int y) + { + unsigned char* pixel = &m_buffer[ (x+y*m_width) * 4]; + return btVector4(pixel[0]*1.f/255.f, + pixel[1]*1.f/255.f, + pixel[2]*1.f/255.f, + pixel[3]*1.f/255.f); + } + + const unsigned char* getBuffer() const { return m_buffer;} + int getWidth() const { return m_width;} + int getHeight() const { return m_height;} + void grapicalPrintf(char* str, void* fontData, int startx = 0,int starty=0); + +}; + +#endif //RENDER_TEXTURE_H + diff --git a/Extras/RigidBodyGpuPipeline/dynamics/testbed/Win32AppMain.cpp b/Extras/RigidBodyGpuPipeline/dynamics/testbed/Win32AppMain.cpp new file mode 100644 index 000000000..84f48a316 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/testbed/Win32AppMain.cpp @@ -0,0 +1,405 @@ +#ifdef _WINDOWS +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2010 Erwin Coumans http://bulletphysics.org + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + +#include +#include + + +#include "DemoApplication.h" + +#include "GLDebugDrawer.h" +#include "GLDebugFont.h" + +#include "BulletDynamics/Dynamics/btDynamicsWorld.h" + +/// This Win32AppMain is shared code between all demos. +/// The actual demo, derived from DemoApplication is created using 'createDemo', in a separate .cpp file +DemoApplication* gDemoApplication = 0; +DemoApplication* createDemo(); + + +// Function Declarations + +LRESULT CALLBACK WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam); +void EnableOpenGL(HWND hWnd, HDC * hDC, HGLRC * hRC); +void DisableOpenGL(HWND hWnd, HDC hDC, HGLRC hRC); +static bool sOpenGLInitialized = false; +static int sWidth = 0; +static int sHeight =0; +static int quitRequest = 0; + +// WinMain + +int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, + LPSTR lpCmdLine, int iCmdShow) +{ + WNDCLASS wc; + HWND hWnd; + HDC hDC; + HGLRC hRC; + MSG msg; + BOOL quit = FALSE; + float theta = 0.0f; + + gDemoApplication = createDemo(); + + + // register window class + wc.style = CS_OWNDC; + wc.lpfnWndProc = WndProc; + wc.cbClsExtra = 0; + wc.cbWndExtra = 0; + wc.hInstance = hInstance; + wc.hIcon = LoadIcon( NULL, IDI_APPLICATION ); + wc.hCursor = LoadCursor( NULL, IDC_ARROW ); + wc.hbrBackground = (HBRUSH)GetStockObject( BLACK_BRUSH ); + wc.lpszMenuName = NULL; + wc.lpszClassName = "BulletPhysics"; + RegisterClass( &wc ); + + // create main window + hWnd = CreateWindow( + "BulletPhysics", "Bullet Physics Sample. http://bulletphysics.org", + WS_CAPTION | WS_VISIBLE | WS_OVERLAPPEDWINDOW, +// 0, 0, 640, 480, + 0, 0, 1024, 768, + NULL, NULL, hInstance, NULL ); + + // enable OpenGL for the window + EnableOpenGL( hWnd, &hDC, &hRC ); + + + GLDebugDrawer debugDraw; + gDemoApplication->myinit(); + //gDemoApplication->reshape(1024, 768); + gDemoApplication->initPhysics(); + if (gDemoApplication->getDynamicsWorld()) + gDemoApplication->getDynamicsWorld()->setDebugDrawer(&debugDraw); + + gDemoApplication->reshape(sWidth,sHeight); + + // program main loop + while ( !quit ) + { + + // check for messages + if ( PeekMessage( &msg, NULL, 0, 0, PM_REMOVE ) ) + { + + // handle or dispatch messages + if ( msg.message == WM_QUIT ) + { + quit = TRUE; + } + else + { + TranslateMessage( &msg ); + DispatchMessage( &msg ); + } + +// gDemoApplication->displayCallback(); + + + }; + + // OpenGL animation code goes here + + glClearColor( .7f, 0.7f, 0.7f, 1.f ); + + gDemoApplication->moveAndDisplay(); + + + SwapBuffers( hDC ); + + theta += 1.0f; + + + } + + + + // shutdown OpenGL + DisableOpenGL( hWnd, hDC, hRC ); + + // destroy the window explicitly + DestroyWindow( hWnd ); + + delete gDemoApplication; + + return msg.wParam; + +} + +// Window Procedure + +LRESULT CALLBACK WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam) +{ + + + + switch (message) + { + + case WM_SYSKEYDOWN: + { + if (lParam & 1<<29) + { + gDemoApplication->m_modifierKeys = VK_LMENU; + } + break; + } + case WM_SYSKEYUP: + { + if (lParam & 1<<29) + { + gDemoApplication->m_modifierKeys = VK_LMENU; + } else + { + gDemoApplication->m_modifierKeys = 0; + } + + break; + } + + + case WM_SIZE: // Size Action Has Taken Place + + switch (wParam) // Evaluate Size Action + { + case SIZE_MINIMIZED: // Was Window Minimized? + return 0; // Return + + case SIZE_MAXIMIZED: // Was Window Maximized? + sWidth = LOWORD (lParam); + sHeight = HIWORD (lParam); + if (sOpenGLInitialized) + { + gDemoApplication->reshape(sWidth,sHeight); + } + return 0; // Return + + case SIZE_RESTORED: // Was Window Restored? + sWidth = LOWORD (lParam); + sHeight = HIWORD (lParam); + if (sOpenGLInitialized) + { + gDemoApplication->reshape(sWidth,sHeight); + } + return 0; // Return + } + break; + + case WM_CREATE: + return 0; + + case WM_MBUTTONUP: + { + int xPos = LOWORD(lParam); + int yPos = HIWORD(lParam); + gDemoApplication->mouseFunc(1,1,xPos,yPos); + break; + } + case WM_MBUTTONDOWN: + { + int xPos = LOWORD(lParam); + int yPos = HIWORD(lParam); + gDemoApplication->mouseFunc(1,0,xPos,yPos); + break; + } + + case WM_LBUTTONUP: + { + int xPos = LOWORD(lParam); + int yPos = HIWORD(lParam); + gDemoApplication->mouseFunc(0,1,xPos,yPos); + break; + } + case 0x020A://WM_MOUSEWHEEL: + { + + int zDelta = (short)HIWORD(wParam); + int xPos = LOWORD(lParam); + int yPos = HIWORD(lParam); + if (zDelta>0) + gDemoApplication->zoomIn(); + else + gDemoApplication->zoomOut(); + break; + } + + case WM_MOUSEMOVE: + { + int xPos = LOWORD(lParam); + int yPos = HIWORD(lParam); + gDemoApplication->mouseMotionFunc(xPos,yPos); + break; + } + case WM_RBUTTONUP: + { + int xPos = LOWORD(lParam); + int yPos = HIWORD(lParam); + gDemoApplication->mouseFunc(2,1,xPos,yPos); + break; + } + case WM_RBUTTONDOWN: + { + int xPos = LOWORD(lParam); + int yPos = HIWORD(lParam); + gDemoApplication->mouseFunc(2,0,xPos,yPos); + break; + } + case WM_LBUTTONDOWN: + { + int xPos = LOWORD(lParam); + int yPos = HIWORD(lParam); + gDemoApplication->mouseFunc(0,0,xPos,yPos); + break; + } +/*#define WM_LBUTTONUP 0x0202 +#define WM_LBUTTONDBLCLK 0x0203 +#define WM_RBUTTONDOWN 0x0204 +#define WM_RBUTTONUP 0x0205 +#define WM_RBUTTONDBLCLK 0x0206 +#define WM_MBUTTONDOWN 0x0207 +#define WM_MBUTTONUP 0x0208 +#define WM_MBUTTONDBLCLK 0x0209 +*/ + + + + case WM_CLOSE: + PostQuitMessage( 0 ); + return 0; + + case WM_DESTROY: + return 0; + + case WM_KEYUP: + switch ( wParam ) + { + + case VK_PRIOR: + case VK_NEXT: + case VK_END: + case VK_HOME: + case VK_LEFT: + case VK_UP: + case VK_RIGHT: + case VK_DOWN: + { + if (gDemoApplication) + gDemoApplication->specialKeyboardUp(wParam,0,0); + return 0; + } + default: + { + gDemoApplication->keyboardUpCallback(tolower(wParam),0,0); + } + return DefWindowProc( hWnd, message, wParam, lParam ); + } + + case WM_KEYDOWN: + printf("bla\n"); + switch ( wParam ) + { + case VK_CONTROL: + case VK_PRIOR: + case VK_NEXT: + case VK_END: + case VK_HOME: + case VK_LEFT: + case VK_UP: + case VK_RIGHT: + case VK_DOWN: + { + if (gDemoApplication) + gDemoApplication->specialKeyboard(wParam,0,0); + break; + } + + case ' ': + { + if (gDemoApplication) + gDemoApplication->clientResetScene(); + break; + } + case 'Q': + case VK_ESCAPE: + { + quitRequest = 1; + PostQuitMessage(0); + } + return 0; + + } + return 0; + + case WM_CHAR: + if (!quitRequest) + gDemoApplication->keyboardCallback(wParam,0,0); + break; + + default: + return DefWindowProc( hWnd, message, wParam, lParam ); + + } + return 0; +} + +// Enable OpenGL + +void EnableOpenGL(HWND hWnd, HDC * hDC, HGLRC * hRC) +{ + PIXELFORMATDESCRIPTOR pfd; + int format; + + // get the device context (DC) + *hDC = GetDC( hWnd ); + + // set the pixel format for the DC + ZeroMemory( &pfd, sizeof( pfd ) ); + pfd.nSize = sizeof( pfd ); + pfd.nVersion = 1; + pfd.dwFlags = PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL | PFD_DOUBLEBUFFER; + pfd.iPixelType = PFD_TYPE_RGBA; + pfd.cColorBits = 24; + pfd.cDepthBits = 16; + pfd.cStencilBits = 1; + pfd.iLayerType = PFD_MAIN_PLANE; + format = ChoosePixelFormat( *hDC, &pfd ); + SetPixelFormat( *hDC, format, &pfd ); + + // create and enable the render context (RC) + *hRC = wglCreateContext( *hDC ); + wglMakeCurrent( *hDC, *hRC ); + sOpenGLInitialized = true; + + +} + +// Disable OpenGL + +void DisableOpenGL(HWND hWnd, HDC hDC, HGLRC hRC) +{ + sOpenGLInitialized = false; + + wglMakeCurrent( NULL, NULL ); + wglDeleteContext( hRC ); + ReleaseDC( hWnd, hDC ); +} + +#endif //_WINDOWS diff --git a/Extras/RigidBodyGpuPipeline/dynamics/testbed/Win32DemoApplication.cpp b/Extras/RigidBodyGpuPipeline/dynamics/testbed/Win32DemoApplication.cpp new file mode 100644 index 000000000..f959cbf4f --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/testbed/Win32DemoApplication.cpp @@ -0,0 +1,79 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#ifdef _WINDOWS + +#include "Win32DemoApplication.h" + + + + +#if 0 +void Win32DemoApplication::renderme() +{ +} +void Win32DemoApplication::setTexturing(bool useTexture) +{ +} + +void Win32DemoApplication::setShadows(bool useShadows) +{ +} + +void Win32DemoApplication::setCameraDistance(float camDist) +{ +} +void Win32DemoApplication::clientResetScene() +{ + +} +#endif + +void Win32DemoApplication::updateModifierKeys() +{ + //not yet +} + + + +void Win32DemoApplication::specialKeyboard(int key, int x, int y) +{ + (void)x; + (void)y; + + switch (key) + { + case VK_LEFT : stepLeft(); break; + case VK_RIGHT : stepRight(); break; + case VK_UP : stepFront(); break; + case VK_DOWN : stepBack(); break; + +// case GLUT_KEY_PAGE_UP : zoomIn(); break; +// case GLUT_KEY_PAGE_DOWN : zoomOut(); break; +// case GLUT_KEY_HOME : toggleIdle(); break; + + default: + // std::cout << "unused (special) key : " << key << std::endl; + break; + } + +} + +void Win32DemoApplication::swapBuffers() +{ +} + +#endif + diff --git a/Extras/RigidBodyGpuPipeline/dynamics/testbed/Win32DemoApplication.h b/Extras/RigidBodyGpuPipeline/dynamics/testbed/Win32DemoApplication.h new file mode 100644 index 000000000..0c2a1ee49 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/testbed/Win32DemoApplication.h @@ -0,0 +1,40 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + +#ifndef WIN32_DEMO_APPLICATION_H +#define WIN32_DEMO_APPLICATION_H + + +#include "DemoApplication.h" + +class Win32DemoApplication : public DemoApplication +{ +protected: + + +public: + + + virtual void swapBuffers(); + + void specialKeyboard(int key, int x, int y); + + virtual void updateModifierKeys(); + + +}; + +#endif //WIN32_DEMO_APPLICATION_H \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/dynamics/testbed/premake4.lua b/Extras/RigidBodyGpuPipeline/dynamics/testbed/premake4.lua new file mode 100644 index 000000000..576aa0fb7 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/dynamics/testbed/premake4.lua @@ -0,0 +1,18 @@ + project "testbed" + + kind "StaticLib" + targetdir "../../build/lib" + includedirs { + ".", + "../../bullet2" + } + configuration {"Windows"} + includedirs { + "../../rendering/GlutGlewWindows" + } + configuration{} + + files { + "**.cpp", + "**.h" + } diff --git a/Extras/RigidBodyGpuPipeline/opencl/3dGridBroadphase/AMD/premake4.lua b/Extras/RigidBodyGpuPipeline/opencl/3dGridBroadphase/AMD/premake4.lua new file mode 100644 index 000000000..2ae27f6f4 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/3dGridBroadphase/AMD/premake4.lua @@ -0,0 +1,29 @@ + + hasCL = findOpenCL_AMD() + + if (hasCL) then + + project "OpenCL_bt3dGridBroadphase_AMD" + + initOpenCL_AMD() + + language "C++" + + kind "StaticLib" + targetdir "../../../bin" + + libdirs {"../../../rendering/GlutGlewWindows"} + + includedirs { +-- "../../../rendering/GlutGlewWindows", + "../../../opencl/3dGridBroadphase/Shared", + "../../../../../src", + "../../primitives" + } + + files { + "../Shared/*.cpp", + "../Shared/*.h" + } + + end \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/3dGridBroadphase/MiniCL/MiniCLTaskWrap.cpp b/Extras/RigidBodyGpuPipeline/opencl/3dGridBroadphase/MiniCL/MiniCLTaskWrap.cpp new file mode 100644 index 000000000..1398190f2 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/3dGridBroadphase/MiniCL/MiniCLTaskWrap.cpp @@ -0,0 +1,23 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include + +extern "C" +{ + #define MSTRINGIFY(A) A + #include "bt3dGridBroadphaseOCL.cl" + #undef MSTRINGIFY +} diff --git a/Extras/RigidBodyGpuPipeline/opencl/3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.cl b/Extras/RigidBodyGpuPipeline/opencl/3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.cl new file mode 100644 index 000000000..f66b6e28b --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.cl @@ -0,0 +1,349 @@ + +MSTRINGIFY( + +int getPosHash(int4 gridPos, __global float4* pParams) +{ + int4 gridDim = *((__global int4*)(pParams + 1)); + gridPos.x &= gridDim.x - 1; + gridPos.y &= gridDim.y - 1; + gridPos.z &= gridDim.z - 1; + int hash = gridPos.z * gridDim.y * gridDim.x + gridPos.y * gridDim.x + gridPos.x; + return hash; +} + +int4 getGridPos(float4 worldPos, __global float4* pParams) +{ + int4 gridPos; + int4 gridDim = *((__global int4*)(pParams + 1)); + gridPos.x = (int)floor(worldPos.x * pParams[0].x) & (gridDim.x - 1); + gridPos.y = (int)floor(worldPos.y * pParams[0].y) & (gridDim.y - 1); + gridPos.z = (int)floor(worldPos.z * pParams[0].z) & (gridDim.z - 1); + return gridPos; +} + + +// calculate grid hash value for each body using its AABB +__kernel void kCalcHashAABB(int numObjects, __global float4* pAABB, __global int2* pHash, __global float4* pParams GUID_ARG) +{ + int index = get_global_id(0); + if(index >= numObjects) + { + return; + } + float4 bbMin = pAABB[index*2]; + float4 bbMax = pAABB[index*2 + 1]; + float4 pos; + pos.x = (bbMin.x + bbMax.x) * 0.5f; + pos.y = (bbMin.y + bbMax.y) * 0.5f; + pos.z = (bbMin.z + bbMax.z) * 0.5f; + pos.w = 0.f; + // get address in grid + int4 gridPos = getGridPos(pos, pParams); + int gridHash = getPosHash(gridPos, pParams); + // store grid hash and body index + int2 hashVal; + hashVal.x = gridHash; + hashVal.y = index; + pHash[index] = hashVal; +} + +__kernel void kClearCellStart( int numCells, + __global int* pCellStart GUID_ARG) +{ + int index = get_global_id(0); + if(index >= numCells) + { + return; + } + pCellStart[index] = -1; +} + +__kernel void kFindCellStart(int numObjects, __global int2* pHash, __global int* cellStart GUID_ARG) +{ + __local int sharedHash[513]; + int index = get_global_id(0); + int2 sortedData; + if(index < numObjects) + { + sortedData = pHash[index]; + // Load hash data into shared memory so that we can look + // at neighboring body's hash value without loading + // two hash values per thread + sharedHash[get_local_id(0) + 1] = sortedData.x; + if((index > 0) && (get_local_id(0) == 0)) + { + // first thread in block must load neighbor body hash + sharedHash[0] = pHash[index-1].x; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if(index < numObjects) + { + if((index == 0) || (sortedData.x != sharedHash[get_local_id(0)])) + { + cellStart[sortedData.x] = index; + } + } +} + +int testAABBOverlap(float4 min0, float4 max0, float4 min1, float4 max1) +{ + return (min0.x <= max1.x)&& (min1.x <= max0.x) && + (min0.y <= max1.y)&& (min1.y <= max0.y) && + (min0.z <= max1.z)&& (min1.z <= max0.z); +} + + + + + +void findPairsInCell( int numObjects, + int4 gridPos, + int index, + __global int2* pHash, + __global int* pCellStart, + __global float4* pAABB, + __global int* pPairBuff, + __global int2* pPairBuffStartCurr, + __global float4* pParams) +{ + int4 pGridDim = *((__global int4*)(pParams + 1)); + int maxBodiesPerCell = pGridDim.w; + int gridHash = getPosHash(gridPos, pParams); + // get start of bucket for this cell + int bucketStart = pCellStart[gridHash]; + if (bucketStart == -1) + { + return; // cell empty + } + // iterate over bodies in this cell + int2 sortedData = pHash[index]; + int unsorted_indx = sortedData.y; + float4 min0 = pAABB[unsorted_indx*2 + 0]; + float4 max0 = pAABB[unsorted_indx*2 + 1]; + int handleIndex = as_int(min0.w); + int2 start_curr = pPairBuffStartCurr[handleIndex]; + int start = start_curr.x; + int curr = start_curr.y; + int2 start_curr_next = pPairBuffStartCurr[handleIndex+1]; + int curr_max = start_curr_next.x - start - 1; + int bucketEnd = bucketStart + maxBodiesPerCell; + bucketEnd = (bucketEnd > numObjects) ? numObjects : bucketEnd; + for(int index2 = bucketStart; index2 < bucketEnd; index2++) + { + int2 cellData = pHash[index2]; + if (cellData.x != gridHash) + { + break; // no longer in same bucket + } + int unsorted_indx2 = cellData.y; + if (unsorted_indx2 < unsorted_indx) // check not colliding with self + { + float4 min1 = pAABB[unsorted_indx2*2 + 0]; + float4 max1 = pAABB[unsorted_indx2*2 + 1]; + if(testAABBOverlap(min0, max0, min1, max1)) + { + int handleIndex2 = as_int(min1.w); + int k; + for(k = 0; k < curr; k++) + { + int old_pair = pPairBuff[start+k] & (~0x60000000); + if(old_pair == handleIndex2) + { + pPairBuff[start+k] |= 0x40000000; + break; + } + } + if(k == curr) + { + if(curr >= curr_max) + { // not a good solution, but let's avoid crash + break; + } + pPairBuff[start+curr] = handleIndex2 | 0x20000000; + curr++; + } + } + } + } + int2 newStartCurr; + newStartCurr.x = start; + newStartCurr.y = curr; + pPairBuffStartCurr[handleIndex] = newStartCurr; + return; +} + +__kernel void kFindOverlappingPairs( int numObjects, + __global float4* pAABB, + __global int2* pHash, + __global int* pCellStart, + __global int* pPairBuff, + __global int2* pPairBuffStartCurr, + __global float4* pParams GUID_ARG) + +{ + int index = get_global_id(0); + if(index >= numObjects) + { + return; + } + int2 sortedData = pHash[index]; + int unsorted_indx = sortedData.y; + float4 bbMin = pAABB[unsorted_indx*2 + 0]; + float4 bbMax = pAABB[unsorted_indx*2 + 1]; + float4 pos; + pos.x = (bbMin.x + bbMax.x) * 0.5f; + pos.y = (bbMin.y + bbMax.y) * 0.5f; + pos.z = (bbMin.z + bbMax.z) * 0.5f; + // get address in grid + int4 gridPosA = getGridPos(pos, pParams); + int4 gridPosB; + // examine only neighbouring cells + for(int z=-1; z<=1; z++) + { + gridPosB.z = gridPosA.z + z; + for(int y=-1; y<=1; y++) + { + gridPosB.y = gridPosA.y + y; + for(int x=-1; x<=1; x++) + { + gridPosB.x = gridPosA.x + x; + findPairsInCell(numObjects, gridPosB, index, pHash, pCellStart, pAABB, pPairBuff, pPairBuffStartCurr, pParams); + } + } + } +} + + +__kernel void kFindPairsLarge( int numObjects, + __global float4* pAABB, + __global int2* pHash, + __global int* pCellStart, + __global int* pPairBuff, + __global int2* pPairBuffStartCurr, + uint numLarge GUID_ARG) +{ + int index = get_global_id(0); + if(index >= numObjects) + { + return; + } + int2 sortedData = pHash[index]; + int unsorted_indx = sortedData.y; + float4 min0 = pAABB[unsorted_indx*2 + 0]; + float4 max0 = pAABB[unsorted_indx*2 + 1]; + int handleIndex = as_int(min0.w); + int2 start_curr = pPairBuffStartCurr[handleIndex]; + int start = start_curr.x; + int curr = start_curr.y; + int2 start_curr_next = pPairBuffStartCurr[handleIndex+1]; + int curr_max = start_curr_next.x - start - 1; + for(uint i = 0; i < numLarge; i++) + { + int indx2 = numObjects + i; + float4 min1 = pAABB[indx2*2 + 0]; + float4 max1 = pAABB[indx2*2 + 1]; + if(testAABBOverlap(min0, max0, min1, max1)) + { + int k; + int handleIndex2 = as_int(min1.w); + for(k = 0; k < curr; k++) + { + int old_pair = pPairBuff[start+k] & (~0x60000000); + if(old_pair == handleIndex2) + { + pPairBuff[start+k] |= 0x40000000; + break; + } + } + if(k == curr) + { + pPairBuff[start+curr] = handleIndex2 | 0x20000000; + if(curr >= curr_max) + { // not a good solution, but let's avoid crash + break; + } + curr++; + } + } + } + int2 newStartCurr; + newStartCurr.x = start; + newStartCurr.y = curr; + pPairBuffStartCurr[handleIndex] = newStartCurr; + return; +} + +__kernel void kComputePairCacheChanges( int numObjects, + __global int* pPairBuff, + __global int2* pPairBuffStartCurr, + __global int* pPairScan, + __global float4* pAABB GUID_ARG) +{ + int index = get_global_id(0); + if(index >= numObjects) + { + return; + } + float4 bbMin = pAABB[index * 2]; + int handleIndex = as_int(bbMin.w); + int2 start_curr = pPairBuffStartCurr[handleIndex]; + int start = start_curr.x; + int curr = start_curr.y; + __global int *pInp = pPairBuff + start; + int num_changes = 0; + for(int k = 0; k < curr; k++, pInp++) + { + if(!((*pInp) & 0x40000000)) + { + num_changes++; + } + } + pPairScan[index+1] = num_changes; +} + +__kernel void kSqueezeOverlappingPairBuff( int numObjects, + __global int* pPairBuff, + __global int2* pPairBuffStartCurr, + __global int* pPairScan, + __global int* pPairOut, + __global float4* pAABB GUID_ARG) +{ + int index = get_global_id(0); + if(index >= numObjects) + { + return; + } + float4 bbMin = pAABB[index * 2]; + int handleIndex = as_int(bbMin.w); + int2 start_curr = pPairBuffStartCurr[handleIndex]; + int start = start_curr.x; + int curr = start_curr.y; + __global int* pInp = pPairBuff + start; + __global int* pOut = pPairOut + pPairScan[index+1]; + __global int* pOut2 = pInp; + int num = 0; + for(int k = 0; k < curr; k++, pInp++) + { + if(!((*pInp) & 0x40000000)) + { + *pOut = *pInp; + pOut++; + } + if((*pInp) & 0x60000000) + { + *pOut2 = (*pInp) & (~0x60000000); + pOut2++; + num++; + } + } + int2 newStartCurr; + newStartCurr.x = start; + newStartCurr.y = num; + pPairBuffStartCurr[handleIndex] = newStartCurr; +} + + + + +); \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.cpp b/Extras/RigidBodyGpuPipeline/opencl/3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.cpp new file mode 100644 index 000000000..7229b8e95 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.cpp @@ -0,0 +1,697 @@ +/* +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006 - 2009 Sony Computer Entertainment Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + +#include "LinearMath/btAlignedAllocator.h" +#include "LinearMath/btQuickprof.h" +#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h" +#include "../basic_initialize/btOpenCLUtils.h" + +#include "bt3dGridBroadphaseOCL.h" + +#include +#include +#include "Adl/Adl.h" +#include +#include +#include + +#define ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK + +#define GRID_OCL_PATH "..\\..\\opencl\\3dGridBroadphase\\Shared\\bt3dGridBroadphaseOCL.cl" + + +#define MSTRINGIFY(A) #A + +static const char* spProgramSource = +#include "bt3dGridBroadphaseOCL.cl" + +adl::PrefixScan::Data* gData1=0; +adl::Buffer* m_srcClBuffer=0; + +struct MySortData +{ + int key; + int value; +}; + +adl::RadixSort32::Data* dataC = 0; +adl::RadixSort::Data* dataHost = 0; + + +static unsigned int infElem = 0x2fffffff; + +static unsigned int zeroEl = 0; +static unsigned int minusOne= -1; + + +bt3dGridBroadphaseOCL::bt3dGridBroadphaseOCL( btOverlappingPairCache* overlappingPairCache, + const btVector3& cellSize, + int gridSizeX, int gridSizeY, int gridSizeZ, + int maxSmallProxies, int maxLargeProxies, int maxPairsPerSmallProxy, + btScalar maxSmallProxySize, + int maxSmallProxiesPerCell, + cl_context context, cl_device_id device, cl_command_queue queue, + adl::DeviceCL* deviceCL + ) : + btGpu3DGridBroadphase(overlappingPairCache, cellSize, gridSizeX, gridSizeY, gridSizeZ, maxSmallProxies, maxLargeProxies, maxPairsPerSmallProxy, maxSmallProxySize, maxSmallProxiesPerCell) +{ + + + initCL(context, device, queue); + allocateBuffers(); + + prefillBuffers(); + + initKernels(); + + //create an Adl device host and OpenCL device + + adl::DeviceUtils::Config cfg; + m_deviceHost = adl::DeviceUtils::allocate( adl::TYPE_HOST, cfg ); + m_ownsDevice = false; + if (!deviceCL) + { + m_ownsDevice = true; + deviceCL = new adl::DeviceCL; + deviceCL->m_context = context; + deviceCL->m_deviceIdx = device; + deviceCL->m_commandQueue = queue; + deviceCL->m_kernelManager = new adl::KernelManager; + } + + m_deviceCL = deviceCL; + + int minSize = 256*1024; + int maxSortBuffer = maxSmallProxies < minSize ? minSize :maxSmallProxies; + + m_srcClBuffer = new adl::Buffer (m_deviceCL,maxSmallProxies+2); + m_srcClBuffer->write(&zeroEl,1,0); + + //m_srcClBuffer->write(&infElem,maxSmallProxies,0); + m_srcClBuffer->write(&infElem,1,maxSmallProxies); + m_srcClBuffer->write(&zeroEl,1,maxSmallProxies+1); + m_deviceCL->waitForCompletion(); + + gData1 = adl::PrefixScan::allocate( m_deviceCL, maxSortBuffer+2,adl::PrefixScanBase::EXCLUSIVE ); + dataHost = adl::RadixSort::allocate( m_deviceHost, maxSmallProxies+2 ); + dataC = adl::RadixSort32::allocate( m_deviceCL, maxSortBuffer+2 ); + +} + + + +bt3dGridBroadphaseOCL::~bt3dGridBroadphaseOCL() +{ + //btSimpleBroadphase will free memory of btSortedOverlappingPairCache, because m_ownsPairCache + assert(m_bInitialized); + adl::RadixSort::deallocate(dataHost); + adl::PrefixScan::deallocate(gData1); + adl::RadixSort32::deallocate(dataC); + adl::DeviceUtils::deallocate(m_deviceHost); + delete m_srcClBuffer; + if (m_ownsDevice) + { + delete m_deviceCL->m_kernelManager; + delete m_deviceCL; + } +} + +#ifdef CL_PLATFORM_MINI_CL +// there is a problem with MSVC9 : static constructors are not called if variables defined in library and are not used +// looks like it is because of optimization +// probably this will happen with other compilers as well +// so to make it robust, register kernels again (it is safe) +#define MINICL_DECLARE(a) extern "C" void a(); +MINICL_DECLARE(kCalcHashAABB) +MINICL_DECLARE(kClearCellStart) +MINICL_DECLARE(kFindCellStart) +MINICL_DECLARE(kFindOverlappingPairs) +MINICL_DECLARE(kFindPairsLarge) +MINICL_DECLARE(kComputePairCacheChanges) +MINICL_DECLARE(kSqueezeOverlappingPairBuff) +#undef MINICL_DECLARE +#endif + +void bt3dGridBroadphaseOCL::initCL(cl_context context, cl_device_id device, cl_command_queue queue) +{ + + #ifdef CL_PLATFORM_MINI_CL + // call constructors here + MINICL_REGISTER(kCalcHashAABB) + MINICL_REGISTER(kClearCellStart) + MINICL_REGISTER(kFindCellStart) + MINICL_REGISTER(kFindOverlappingPairs) + MINICL_REGISTER(kFindPairsLarge) + MINICL_REGISTER(kComputePairCacheChanges) + MINICL_REGISTER(kSqueezeOverlappingPairBuff) + #endif + + cl_int ciErrNum; + + btAssert(context); + m_cxMainContext = context; + btAssert(device); + m_cdDevice = device; + btAssert(queue); + m_cqCommandQue = queue; + + //adl::Kernel kern = m_deviceCL->getKernel(fileName,funcName,options,src); + + m_cpProgram = btOpenCLUtils::compileCLProgramFromString(m_cxMainContext,m_cdDevice,spProgramSource, &ciErrNum,"-DGUID_ARG=""""",GRID_OCL_PATH); + + printf("OK\n"); +} + + +void bt3dGridBroadphaseOCL::initKernels() +{ + initKernel(GRID3DOCL_KERNEL_CALC_HASH_AABB, "kCalcHashAABB"); + setKernelArg(GRID3DOCL_KERNEL_CALC_HASH_AABB, 1, sizeof(cl_mem),(void*)&m_dAABB); + setKernelArg(GRID3DOCL_KERNEL_CALC_HASH_AABB, 2, sizeof(cl_mem),(void*)&m_dBodiesHash); + setKernelArg(GRID3DOCL_KERNEL_CALC_HASH_AABB, 3, sizeof(cl_mem),(void*)&m_dBpParams); + + initKernel(GRID3DOCL_KERNEL_CLEAR_CELL_START, "kClearCellStart"); + setKernelArg(GRID3DOCL_KERNEL_CLEAR_CELL_START, 1, sizeof(cl_mem),(void*)&m_dCellStart); + + initKernel(GRID3DOCL_KERNEL_FIND_CELL_START, "kFindCellStart"); + setKernelArg(GRID3DOCL_KERNEL_FIND_CELL_START, 1, sizeof(cl_mem),(void*)&m_dBodiesHash); + setKernelArg(GRID3DOCL_KERNEL_FIND_CELL_START, 2, sizeof(cl_mem),(void*)&m_dCellStart); + + initKernel(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, "kFindOverlappingPairs"); + setKernelArg(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, 1, sizeof(cl_mem),(void*)&m_dAABB); + setKernelArg(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, 2, sizeof(cl_mem),(void*)&m_dBodiesHash); + setKernelArg(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, 3, sizeof(cl_mem),(void*)&m_dCellStart); + setKernelArg(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, 4, sizeof(cl_mem),(void*)&m_dPairBuff); + setKernelArg(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, 5, sizeof(cl_mem),(void*)&m_dPairBuffStartCurr); + setKernelArg(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, 6, sizeof(cl_mem),(void*)&m_dBpParams); + + initKernel(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, "kFindPairsLarge"); + setKernelArg(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, 1, sizeof(cl_mem),(void*)&m_dAABB); + setKernelArg(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, 2, sizeof(cl_mem),(void*)&m_dBodiesHash); + setKernelArg(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, 3, sizeof(cl_mem),(void*)&m_dCellStart); + setKernelArg(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, 4, sizeof(cl_mem),(void*)&m_dPairBuff); + setKernelArg(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, 5, sizeof(cl_mem),(void*)&m_dPairBuffStartCurr); + + initKernel(GRID3DOCL_KERNEL_COMPUTE_CACHE_CHANGES, "kComputePairCacheChanges"); + setKernelArg(GRID3DOCL_KERNEL_COMPUTE_CACHE_CHANGES, 1, sizeof(cl_mem),(void*)&m_dPairBuff); + setKernelArg(GRID3DOCL_KERNEL_COMPUTE_CACHE_CHANGES, 2, sizeof(cl_mem),(void*)&m_dPairBuffStartCurr); + setKernelArg(GRID3DOCL_KERNEL_COMPUTE_CACHE_CHANGES, 3, sizeof(cl_mem),(void*)&m_dPairScanChanged); + setKernelArg(GRID3DOCL_KERNEL_COMPUTE_CACHE_CHANGES, 4, sizeof(cl_mem),(void*)&m_dAABB); + + initKernel(GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, "kSqueezeOverlappingPairBuff"); + setKernelArg(GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, 1, sizeof(cl_mem),(void*)&m_dPairBuff); + setKernelArg(GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, 2, sizeof(cl_mem),(void*)&m_dPairBuffStartCurr); + setKernelArg(GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, 3, sizeof(cl_mem),(void*)&m_dPairScanChanged); + setKernelArg(GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, 4, sizeof(cl_mem),(void*)&m_dPairsChanged); + setKernelArg(GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, 5, sizeof(cl_mem),(void*)&m_dAABB); + +} + + +void bt3dGridBroadphaseOCL::allocateBuffers() +{ + cl_int ciErrNum; + unsigned int memSize; + // current version of bitonic sort works for power of 2 arrays only, so ... + m_hashSize = 1; + for(int bit = 1; bit < 32; bit++) + { + if(m_hashSize >= m_maxHandles) + { + break; + } + m_hashSize <<= 1; + } + memSize = m_hashSize * 2 * sizeof(unsigned int); + if (memSize < 1024*1024) + memSize = 1024*1024; + + m_dBodiesHash = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum); + GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS); + + memSize = m_numCells * sizeof(unsigned int); + m_dCellStart = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum); + GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS); + + memSize = m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int); + m_dPairBuff = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum); + GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS); + + memSize = (m_maxHandles * 2 + 1) * sizeof(unsigned int); + m_dPairBuffStartCurr = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum); + GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS); + + unsigned int numAABB = m_maxHandles + m_maxLargeHandles; + memSize = numAABB * sizeof(float) * 4 * 2; + m_dAABB = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum); + GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS); + + memSize = (m_maxHandles + 2) * sizeof(unsigned int); + m_dPairScanChanged = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum); + GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS); + + memSize = m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int); + m_dPairsChanged = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum); + GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS); + + m_dPairsContiguous = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum); + GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS); + + memSize = 3 * 4 * sizeof(float); + m_dBpParams = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum); + GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS); +} + +void bt3dGridBroadphaseOCL::prefillBuffers() +{ + memset(m_hBodiesHash, 0xFF, m_maxHandles*2*sizeof(unsigned int)); + copyArrayToDevice(m_dBodiesHash, m_hBodiesHash, m_maxHandles * 2 * sizeof(unsigned int)); + // now fill the rest (bitonic sorting works with size == pow of 2) + int remainder = m_hashSize - m_maxHandles; + if(remainder) + { + copyArrayToDevice(m_dBodiesHash, m_hBodiesHash, remainder * 2 * sizeof(unsigned int), m_maxHandles * 2 * sizeof(unsigned int), 0); + } + copyArrayToDevice(m_dPairBuffStartCurr, m_hPairBuffStartCurr, (m_maxHandles * 2 + 1) * sizeof(unsigned int)); + memset(m_hPairBuff, 0x00, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int)); + copyArrayToDevice(m_dPairBuff, m_hPairBuff, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int)); +} + + +void bt3dGridBroadphaseOCL::initKernel(int kernelId, char* pName) +{ + + cl_int ciErrNum; + cl_kernel kernel = clCreateKernel(m_cpProgram, pName, &ciErrNum); + GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS); + size_t wgSize; + ciErrNum = clGetKernelWorkGroupInfo(kernel, m_cdDevice, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wgSize, NULL); + GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS); + m_kernels[kernelId].m_Id = kernelId; + m_kernels[kernelId].m_kernel = kernel; + m_kernels[kernelId].m_name = pName; + m_kernels[kernelId].m_workgroupSize = (int)wgSize; + return; +} + +void bt3dGridBroadphaseOCL::runKernelWithWorkgroupSize(int kernelId, int globalSize) +{ + if(globalSize <= 0) + { + return; + } + cl_kernel kernelFunc = m_kernels[kernelId].m_kernel; + cl_int ciErrNum = clSetKernelArg(kernelFunc, 0, sizeof(int), (void*)&globalSize); + GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS); + int workgroupSize = btMin(64,m_kernels[kernelId].m_workgroupSize); + + if(workgroupSize <= 0) + { // let OpenCL library calculate workgroup size + size_t globalWorkSize[2]; + globalWorkSize[0] = globalSize; + globalWorkSize[1] = 1; + ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, kernelFunc, 1, NULL, globalWorkSize, NULL, 0,0,0 ); + } + else + { + size_t localWorkSize[2], globalWorkSize[2]; + //workgroupSize = btMin(workgroupSize, globalSize); + int num_t = globalSize / workgroupSize; + int num_g = num_t * workgroupSize; + if(num_g < globalSize) + { + num_t++; + } + localWorkSize[0] = workgroupSize; + globalWorkSize[0] = num_t * workgroupSize; + localWorkSize[1] = 1; + globalWorkSize[1] = 1; + ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, kernelFunc, 1, NULL, globalWorkSize, localWorkSize, 0,0,0 ); + } + GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS); + ciErrNum = clFlush(m_cqCommandQue); + GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS); +} + + +void bt3dGridBroadphaseOCL::setKernelArg(int kernelId, int argNum, int argSize, void* argPtr) +{ + cl_int ciErrNum; + ciErrNum = clSetKernelArg(m_kernels[kernelId].m_kernel, argNum, argSize, argPtr); + GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS); +} + + +void bt3dGridBroadphaseOCL::copyArrayToDevice(cl_mem device, const void* host, unsigned int size, int devOffs, int hostOffs) +{ + if (size) + { + cl_int ciErrNum; + char* pHost = (char*)host + hostOffs; + ciErrNum = clEnqueueWriteBuffer(m_cqCommandQue, device, CL_TRUE, devOffs, size, pHost, 0, NULL, NULL); + GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS); + } +} + +void bt3dGridBroadphaseOCL::copyArrayFromDevice(void* host, const cl_mem device, unsigned int size, int hostOffs, int devOffs) +{ + if (size) + { + cl_int ciErrNum; + char* pHost = (char*)host + hostOffs; + ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, device, CL_TRUE, devOffs, size, pHost, 0, NULL, NULL); + GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS); + } +} + + + +// +// overrides +// + + +void bt3dGridBroadphaseOCL::prepareAABB() +{ + btGpu3DGridBroadphase::prepareAABB(); + copyArrayToDevice(m_dAABB, m_hAABB, sizeof(bt3DGrid3F1U) * 2 * (m_numHandles + m_numLargeHandles)); + return; +} + +void bt3dGridBroadphaseOCL::setParameters(bt3DGridBroadphaseParams* hostParams) +{ + btGpu3DGridBroadphase::setParameters(hostParams); + struct btParamsBpOCL + { + float m_invCellSize[4]; + int m_gridSize[4]; + }; + btParamsBpOCL hParams; + hParams.m_invCellSize[0] = m_params.m_invCellSizeX; + hParams.m_invCellSize[1] = m_params.m_invCellSizeY; + hParams.m_invCellSize[2] = m_params.m_invCellSizeZ; + hParams.m_invCellSize[3] = 0.f; + hParams.m_gridSize[0] = m_params.m_gridSizeX; + hParams.m_gridSize[1] = m_params.m_gridSizeY; + hParams.m_gridSize[2] = m_params.m_gridSizeZ; + hParams.m_gridSize[3] = m_params.m_maxBodiesPerCell; + copyArrayToDevice(m_dBpParams, &hParams, sizeof(btParamsBpOCL)); + return; +} + + +void bt3dGridBroadphaseOCL::calcHashAABB() +{ + BT_PROFILE("calcHashAABB"); +#if 1 + runKernelWithWorkgroupSize(GRID3DOCL_KERNEL_CALC_HASH_AABB, m_numHandles); +#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK + clFinish(m_cqCommandQue); +#endif //ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK + +#else + btGpu3DGridBroadphase::calcHashAABB(); +#endif + + return; +} + + +void bt3dGridBroadphaseOCL::sortHash() +{ + BT_PROFILE("sortHash"); +#ifdef CL_PLATFORM_MINI_CL + //copyArrayFromDevice(m_hBodiesHash, m_dBodiesHash, m_numHandles * 2 * sizeof(unsigned int)); + btGpu3DGridBroadphase::sortHash(); + copyArrayToDevice(m_dBodiesHash, m_hBodiesHash, m_numHandles * 2 * sizeof(unsigned int)); +#else + +//#define USE_HOST +#ifdef USE_HOST + copyArrayFromDevice(m_hBodiesHash, m_dBodiesHash, m_numHandles * 2 * sizeof(unsigned int)); + //adl::Buffer keysIn,keysOut,valuesIn,valuesOut; + ///adl::RadixSort32::execute(dataC,keysIn,keysOut,valuesIn,valuesOut,m_numHandles); + adl::HostBuffer inoutHost; + inoutHost.m_device = m_deviceHost; + inoutHost.m_ptr = (adl::SortData*)m_hBodiesHash; + inoutHost.m_size = m_numHandles; + adl::RadixSort::execute(dataHost, inoutHost,m_numHandles); + copyArrayToDevice(m_dBodiesHash, m_hBodiesHash, m_numHandles * 2 * sizeof(unsigned int)); +#else + { + clFinish(m_cqCommandQue); + BT_PROFILE("RadixSort32::execute"); + adl::Buffer inout; + inout.m_device = this->m_deviceCL; + inout.m_size = m_numHandles; + inout.m_ptr = (adl::SortData*)m_dBodiesHash; + int actualHandles = m_numHandles; + int dataAlignment = adl::RadixSort32::DATA_ALIGNMENT; + + if (actualHandles%dataAlignment) + { + actualHandles += dataAlignment-(actualHandles%dataAlignment); + } + + adl::RadixSort32::execute(dataC,inout, actualHandles); +#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK + clFinish(m_cqCommandQue); +#endif //ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK + } + { + //BT_PROFILE("copyArrayFromDevice"); + //copyArrayFromDevice(m_hBodiesHash, m_dBodiesHash, m_numHandles * 2 * sizeof(unsigned int)); +#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK + clFinish(m_cqCommandQue); +#endif //ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK + } + + +#endif //USE_HOST +#endif + + return; +} + + + +void bt3dGridBroadphaseOCL::findCellStart() +{ +#if 1 + BT_PROFILE("findCellStart"); + + #if defined(CL_PLATFORM_MINI_CL) + btGpu3DGridBroadphase::findCellStart(); + copyArrayToDevice(m_dCellStart, m_hCellStart, m_numCells * sizeof(unsigned int)); + #else + runKernelWithWorkgroupSize(GRID3DOCL_KERNEL_CLEAR_CELL_START, m_numCells); + runKernelWithWorkgroupSize(GRID3DOCL_KERNEL_FIND_CELL_START, m_numHandles); + #endif +#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK + clFinish(m_cqCommandQue); +#endif + +#else + btGpu3DGridBroadphase::findCellStart(); +#endif + + return; +} + + + +void bt3dGridBroadphaseOCL::findOverlappingPairs() +{ +#if 1 + BT_PROFILE("findOverlappingPairs"); + runKernelWithWorkgroupSize(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, m_numHandles); +#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK + clFinish(m_cqCommandQue); +#endif + +#else + btGpu3DGridBroadphase::findOverlappingPairs(); + copyArrayToDevice(m_dPairBuffStartCurr, m_hPairBuffStartCurr, (m_maxHandles * 2 + 1) * sizeof(unsigned int)); + copyArrayToDevice(m_dPairBuff, m_hPairBuff, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int)); +#endif + return; +} + + +void bt3dGridBroadphaseOCL::findPairsLarge() +{ + BT_PROFILE("findPairsLarge"); +#if 1 + if(m_numLargeHandles) + { + setKernelArg(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, 6, sizeof(int),(void*)&m_numLargeHandles); + runKernelWithWorkgroupSize(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, m_numHandles); + } +#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK + clFinish(m_cqCommandQue); +#endif + +#else + btGpu3DGridBroadphase::findPairsLarge(); +#endif + return; +} + + + +void bt3dGridBroadphaseOCL::computePairCacheChanges() +{ + BT_PROFILE("computePairCacheChanges"); +#if 1 + runKernelWithWorkgroupSize(GRID3DOCL_KERNEL_COMPUTE_CACHE_CHANGES, m_numHandles); +#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK + clFinish(m_cqCommandQue); +#endif + copyArrayFromDevice( m_hPairScanChanged,m_dPairScanChanged, sizeof(unsigned int)*(m_numHandles + 2)); + +#else + btGpu3DGridBroadphase::computePairCacheChanges(); + copyArrayToDevice(m_dPairScanChanged, m_hPairScanChanged, sizeof(unsigned int)*(m_numHandles + 2)); + + +#endif + return; +} + + + + +extern cl_device_type deviceType; + +void bt3dGridBroadphaseOCL::scanOverlappingPairBuff(bool copyToCpu) +{ + + //Intel/CPU version doesn't handlel Adl scan well +#if 0 + { + copyArrayFromDevice(m_hPairScanChanged, m_dPairScanChanged, sizeof(unsigned int)*(m_numHandles + 2)); + btGpu3DGridBroadphase::scanOverlappingPairBuff(); + copyArrayToDevice(m_dPairScanChanged, m_hPairScanChanged, sizeof(unsigned int)*(m_numHandles + 2)); + m_numPrefixSum = m_hPairScanChanged[m_numHandles+1]; + clFinish(m_cqCommandQue); + //memset(m_hPairScanChanged,0,sizeof(int)*m_maxHandles + 2); + } +#else + { + + // copyArrayFromDevice(m_hPairScanChanged, m_dPairScanChanged, sizeof(unsigned int)*(m_numHandles + 2)); + // btGpu3DGridBroadphase::scanOverlappingPairBuff(); + + adl::Buffer destBuffer; + + { + BT_PROFILE("copy GPU->GPU"); + + destBuffer.m_ptr = (unsigned int*)m_dPairScanChanged; + destBuffer.m_device = m_deviceCL; + destBuffer.m_size = sizeof(unsigned int)*(m_numHandles+2); + m_deviceCL->copy(m_srcClBuffer, &destBuffer,m_numHandles,1,1); + +#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK + clFinish(m_cqCommandQue); +#endif + + } + + { + BT_PROFILE("PrefixScan"); + + adl::PrefixScan::execute(gData1,*m_srcClBuffer,destBuffer, m_numHandles+2,&m_numPrefixSum); + +#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK + clFinish(m_cqCommandQue); +#endif + //if (m_numPrefixSum>0x1000) + // { + // printf("error m_numPrefixSum==%d\n",m_numPrefixSum); + // } + + } + +#if 0 + unsigned int* verifyhPairScanChanged = new unsigned int[m_maxHandles + 2]; + memset(verifyhPairScanChanged,0,sizeof(int)*m_maxHandles + 2); + + copyArrayFromDevice(verifyhPairScanChanged, m_dPairScanChanged, sizeof(unsigned int)*(m_numHandles + 2)); + clFinish(m_cqCommandQue); + + /*for (int i=0;i CPU"); + copyArrayFromDevice(m_hPairScanChanged, m_dPairScanChanged, sizeof(unsigned int)*(m_numHandles + 2)); +#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK + clFinish(m_cqCommandQue); +#endif + } + + } + + } +#endif + + +} + + + +void bt3dGridBroadphaseOCL::squeezeOverlappingPairBuff() +{ + BT_PROFILE("btCuda_squeezeOverlappingPairBuff"); +#if 1 + runKernelWithWorkgroupSize(GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, m_numHandles); +// btCuda_squeezeOverlappingPairBuff(m_dPairBuff, m_dPairBuffStartCurr, m_dPairScanChanged, m_dPairsChanged, m_dAABB, m_numHandles); + + //copyArrayFromDevice(m_hPairsChanged, m_dPairsChanged, sizeof(unsigned int) * m_numPrefixSum);//m_hPairScanChanged[m_numHandles+1]); //gSum +#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK + clFinish(m_cqCommandQue); +#endif + +#else + btGpu3DGridBroadphase::squeezeOverlappingPairBuff(); +#endif + return; +} + + + +void bt3dGridBroadphaseOCL::resetPool(btDispatcher* dispatcher) +{ + btGpu3DGridBroadphase::resetPool(dispatcher); + prefillBuffers(); +} + + diff --git a/Extras/RigidBodyGpuPipeline/opencl/3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.h b/Extras/RigidBodyGpuPipeline/opencl/3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.h new file mode 100644 index 000000000..dee297c29 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.h @@ -0,0 +1,146 @@ +/* +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006 - 2009 Sony Computer Entertainment Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + + +#ifndef BT3DGRIDBROADPHASEOCL_H +#define BT3DGRIDBROADPHASEOCL_H + +#ifdef __APPLE__ +#ifdef USE_MINICL + #include +#else + #include +#endif +//CL_PLATFORM_MINI_CL could be defined in build system +#else +//#include +// standard utility and system includes +#ifdef USE_MINICL + #include +#else + #include +#endif +// Extra CL/GL include +//#include +#endif //__APPLE__ + +namespace adl +{ + struct Device; + struct DeviceCL; +}; + +#include "BulletCollision/BroadphaseCollision/btSimpleBroadphase.h" +#include "btGpu3DGridBroadphaseSharedTypes.h" +#include "btGpu3DGridBroadphase.h" + + +#define GRID3DOCL_CHECKERROR(a, b) if((a)!=(b)) { printf("3D GRID OCL Error : %d\n", (a)); btAssert((a) == (b)); } + +enum +{ + GRID3DOCL_KERNEL_CALC_HASH_AABB = 0, + GRID3DOCL_KERNEL_CLEAR_CELL_START, + GRID3DOCL_KERNEL_FIND_CELL_START, + GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, + GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, + GRID3DOCL_KERNEL_COMPUTE_CACHE_CHANGES, + GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, + GRID3DOCL_KERNEL_TOTAL +}; + +struct bt3dGridOCLKernelInfo +{ + int m_Id; + cl_kernel m_kernel; + char* m_name; + int m_workgroupSize; +}; + + +///The bt3dGridBroadphaseOCL uses OpenCL-capable GPU to compute overlapping pairs + +class bt3dGridBroadphaseOCL : public btGpu3DGridBroadphase +{ +protected: + int m_hashSize; + cl_context m_cxMainContext; + cl_device_id m_cdDevice; + cl_command_queue m_cqCommandQue; + cl_program m_cpProgram; + bt3dGridOCLKernelInfo m_kernels[GRID3DOCL_KERNEL_TOTAL]; + // data buffers + cl_mem m_dBodiesHash; + cl_mem m_dCellStart; + cl_mem m_dPairBuff; + cl_mem m_dPairBuffStartCurr; +public: + cl_mem m_dAABB; +protected: + cl_mem m_dPairScanChanged; + cl_mem m_dPairsChanged; + cl_mem m_dPairsContiguous; + cl_mem m_dBpParams; + + adl::Device* m_deviceHost; + adl::DeviceCL* m_deviceCL; + bool m_ownsDevice; + + +public: + unsigned int m_numPrefixSum; + + bt3dGridBroadphaseOCL( btOverlappingPairCache* overlappingPairCache, + const btVector3& cellSize, + int gridSizeX, int gridSizeY, int gridSizeZ, + int maxSmallProxies, int maxLargeProxies, int maxPairsPerSmallProxy, + btScalar maxSmallProxySize, + int maxSmallProxiesPerCell = 8, + cl_context context = NULL, + cl_device_id device = NULL, + cl_command_queue queue = NULL, + adl::DeviceCL* deviceCL = 0 + ); + virtual ~bt3dGridBroadphaseOCL(); + +protected: + void initCL(cl_context context, cl_device_id device, cl_command_queue queue); + void initKernels(); + void allocateBuffers(); + void prefillBuffers(); + void initKernel(int kernelId, char* pName); + void allocateArray(void** devPtr, unsigned int size); + void freeArray(void* devPtr); + void runKernelWithWorkgroupSize(int kernelId, int globalSize); + void setKernelArg(int kernelId, int argNum, int argSize, void* argPtr); + void copyArrayToDevice(cl_mem device, const void* host, unsigned int size, int devOffs = 0, int hostOffs = 0); + void copyArrayFromDevice(void* host, const cl_mem device, unsigned int size, int hostOffs = 0, int devOffs = 0); + +// overrides + virtual void setParameters(bt3DGridBroadphaseParams* hostParams); + virtual void prepareAABB(); + virtual void calcHashAABB(); + virtual void sortHash(); + virtual void findCellStart(); + virtual void findOverlappingPairs(); + virtual void findPairsLarge(); + virtual void computePairCacheChanges(); + virtual void scanOverlappingPairBuff(bool copyToCpu=true); + virtual void squeezeOverlappingPairBuff(); + virtual void resetPool(btDispatcher* dispatcher); +}; + +#endif //BT3DGRIDBROADPHASEOCL_H \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/3dGridBroadphase/Shared/btGpu3DGridBroadphase.cpp b/Extras/RigidBodyGpuPipeline/opencl/3dGridBroadphase/Shared/btGpu3DGridBroadphase.cpp new file mode 100644 index 000000000..3ecb6a2fb --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/3dGridBroadphase/Shared/btGpu3DGridBroadphase.cpp @@ -0,0 +1,626 @@ +/* +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +///The 3 following lines include the CPU implementation of the kernels, keep them in this order. +#include "btGpuDefines.h" +#include "btGpuUtilsSharedDefs.h" +#include "btGpuUtilsSharedCode.h" + + + +#include "LinearMath/btAlignedAllocator.h" +#include "LinearMath/btQuickprof.h" +#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h" + + + +#include "btGpuDefines.h" +#include "btGpuUtilsSharedDefs.h" + +#include "btGpu3DGridBroadphaseSharedDefs.h" + +#include "btGpu3DGridBroadphase.h" +#include //for memset + + +#include + + + +static bt3DGridBroadphaseParams s3DGridBroadphaseParams; + + + +btGpu3DGridBroadphase::btGpu3DGridBroadphase( const btVector3& cellSize, + int gridSizeX, int gridSizeY, int gridSizeZ, + int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody, + btScalar maxSmallProxySize, + int maxBodiesPerCell) : + btSimpleBroadphase(maxSmallProxies, +// new (btAlignedAlloc(sizeof(btSortedOverlappingPairCache),16)) btSortedOverlappingPairCache), + new (btAlignedAlloc(sizeof(btHashedOverlappingPairCache),16)) btHashedOverlappingPairCache), + m_bInitialized(false), + m_numBodies(0) +{ + _initialize(cellSize, gridSizeX, gridSizeY, gridSizeZ, + maxSmallProxies, maxLargeProxies, maxPairsPerBody, + maxSmallProxySize, maxBodiesPerCell); +} + + + +btGpu3DGridBroadphase::btGpu3DGridBroadphase( btOverlappingPairCache* overlappingPairCache, + const btVector3& cellSize, + int gridSizeX, int gridSizeY, int gridSizeZ, + int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody, + btScalar maxSmallProxySize, + int maxBodiesPerCell) : + btSimpleBroadphase(maxSmallProxies, overlappingPairCache), + m_bInitialized(false), + m_numBodies(0) +{ + _initialize(cellSize, gridSizeX, gridSizeY, gridSizeZ, + maxSmallProxies, maxLargeProxies, maxPairsPerBody, + maxSmallProxySize, maxBodiesPerCell); +} + + + +btGpu3DGridBroadphase::~btGpu3DGridBroadphase() +{ + //btSimpleBroadphase will free memory of btSortedOverlappingPairCache, because m_ownsPairCache + assert(m_bInitialized); + _finalize(); + + +} + +// returns 2^n : 2^(n+1) > val >= 2^n +int btGpu3DGridBroadphase::getFloorPowOfTwo(int val) +{ + int mask = 0x40000000; + for(int k = 0; k < 30; k++, mask >>= 1) + { + if(mask & val) + { + break; + } + } + return mask; +} + + + +void btGpu3DGridBroadphase::_initialize( const btVector3& cellSize, + int gridSizeX, int gridSizeY, int gridSizeZ, + int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody, + btScalar maxSmallProxySize, + int maxBodiesPerCell) +{ + // set various paramerers + m_ownsPairCache = true; + m_params.m_gridSizeX = getFloorPowOfTwo(gridSizeX); + m_params.m_gridSizeY = getFloorPowOfTwo(gridSizeY); + m_params.m_gridSizeZ = getFloorPowOfTwo(gridSizeZ); + m_params.m_numCells = m_params.m_gridSizeX * m_params.m_gridSizeY * m_params.m_gridSizeZ; + m_numCells = m_params.m_numCells; + m_params.m_invCellSizeX = btScalar(1.f) / cellSize[0]; + m_params.m_invCellSizeY = btScalar(1.f) / cellSize[1]; + m_params.m_invCellSizeZ = btScalar(1.f) / cellSize[2]; + m_maxRadius = maxSmallProxySize * btScalar(0.5f); + m_params.m_numBodies = m_numBodies; + m_params.m_maxBodiesPerCell = maxBodiesPerCell; + + m_numLargeHandles = 0; + m_maxLargeHandles = maxLargeProxies; + + m_maxPairsPerBody = maxPairsPerBody; + + m_LastLargeHandleIndex = -1; + + assert(!m_bInitialized); + + // allocate host storage + m_hBodiesHash = new unsigned int[m_maxHandles * 2]; + memset(m_hBodiesHash, 0x00, m_maxHandles*2*sizeof(unsigned int)); + + m_hCellStart = new unsigned int[m_params.m_numCells]; + memset(m_hCellStart, 0x00, m_params.m_numCells * sizeof(unsigned int)); + + m_hPairBuffStartCurr = new unsigned int[m_maxHandles * 2 + 2]; + // --------------- for now, init with m_maxPairsPerBody for each body + m_hPairBuffStartCurr[0] = 0; + m_hPairBuffStartCurr[1] = 0; + for(int i = 1; i <= m_maxHandles; i++) + { + m_hPairBuffStartCurr[i * 2] = m_hPairBuffStartCurr[(i-1) * 2] + m_maxPairsPerBody; + m_hPairBuffStartCurr[i * 2 + 1] = 0; + } + //---------------- + unsigned int numAABB = m_maxHandles + m_maxLargeHandles; + m_hAABB = new bt3DGrid3F1U[numAABB * 2]; // AABB Min & Max + + m_hPairBuff = new unsigned int[m_maxHandles * m_maxPairsPerBody]; + memset(m_hPairBuff, 0x00, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int)); // needed? + + m_hPairScanChanged = new unsigned int[m_maxHandles + 2]; + memset(m_hPairScanChanged,0,sizeof(int)*m_maxHandles + 2); + + m_hPairsChanged = new unsigned int[m_maxHandles * m_maxPairsPerBody]; + memset(m_hPairsChanged,0,sizeof(int)*(m_maxHandles * m_maxPairsPerBody)); + + m_hAllOverlappingPairs= new MyUint2[m_maxHandles * m_maxPairsPerBody]; + memset(m_hAllOverlappingPairs,0,sizeof(MyUint2)*(m_maxHandles * m_maxPairsPerBody)); + + +// large proxies + + // allocate handles buffer and put all handles on free list + m_pLargeHandlesRawPtr = btAlignedAlloc(sizeof(btSimpleBroadphaseProxy) * m_maxLargeHandles, 16); + m_pLargeHandles = new(m_pLargeHandlesRawPtr) btSimpleBroadphaseProxy[m_maxLargeHandles]; + m_firstFreeLargeHandle = 0; + { + for (int i = m_firstFreeLargeHandle; i < m_maxLargeHandles; i++) + { + m_pLargeHandles[i].SetNextFree(i + 1); + m_pLargeHandles[i].m_uniqueId = m_maxHandles+2+i; + } + m_pLargeHandles[m_maxLargeHandles - 1].SetNextFree(0); + } + +// debug data + m_numPairsAdded = 0; + m_numOverflows = 0; + + + m_bInitialized = true; +} + + + +void btGpu3DGridBroadphase::_finalize() +{ + assert(m_bInitialized); + delete [] m_hBodiesHash; + delete [] m_hCellStart; + delete [] m_hPairBuffStartCurr; + delete [] m_hAABB; + delete [] m_hPairBuff; + delete [] m_hPairScanChanged; + delete [] m_hPairsChanged; + delete [] m_hAllOverlappingPairs; + btAlignedFree(m_pLargeHandlesRawPtr); + m_bInitialized = false; +} + + + +void btGpu3DGridBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher) +{ + btSimpleBroadphase::calculateOverlappingPairs(dispatcher); + + if(m_numHandles <= 0) + { + BT_PROFILE("addLarge2LargePairsToCache"); + addLarge2LargePairsToCache(dispatcher); + return; + } + // update constants + { + BT_PROFILE("setParameters"); + setParameters(&m_params); + } + + // prepare AABB array + { + BT_PROFILE("prepareAABB"); + prepareAABB(); + } + // calculate hash + { + BT_PROFILE("calcHashAABB"); + calcHashAABB(); + } + { + BT_PROFILE("sortHash"); + // sort bodies based on hash + sortHash(); + } + // find start of each cell + { + BT_PROFILE("findCellStart"); + findCellStart(); + } + { + BT_PROFILE("findOverlappingPairs"); + // findOverlappingPairs (small/small) + findOverlappingPairs(); + } + // findOverlappingPairs (small/large) + { + BT_PROFILE("findPairsLarge"); + findPairsLarge(); + } + // add pairs to CPU cache + { + BT_PROFILE("computePairCacheChanges"); + computePairCacheChanges(); + } + { + BT_PROFILE("scanOverlappingPairBuff"); + scanOverlappingPairBuff(); + } + { + BT_PROFILE("squeezeOverlappingPairBuff"); + squeezeOverlappingPairBuff(); + } + { + BT_PROFILE("addPairsToCache"); + addPairsToCache(dispatcher); + } + // find and add large/large pairs to CPU cache + { + BT_PROFILE("addLarge2LargePairsToCache"); + addLarge2LargePairsToCache(dispatcher); + } + return; +} + + + +void btGpu3DGridBroadphase::addPairsToCache(btDispatcher* dispatcher) +{ + m_numPairsAdded = 0; + m_numPairsRemoved = 0; + for(int i = 0; i < m_numHandles; i++) + { + unsigned int num = m_hPairScanChanged[i+2] - m_hPairScanChanged[i+1]; + if(!num) + { + continue; + } + unsigned int* pInp = m_hPairsChanged + m_hPairScanChanged[i+1]; + unsigned int index0 = m_hAABB[i * 2].uw; + btSimpleBroadphaseProxy* proxy0 = &m_pHandles[index0]; + for(unsigned int j = 0; j < num; j++) + { + unsigned int indx1_s = pInp[j]; + unsigned int index1 = indx1_s & (~BT_3DGRID_PAIR_ANY_FLG); + btSimpleBroadphaseProxy* proxy1; + if(index1 < (unsigned int)m_maxHandles) + { + proxy1 = &m_pHandles[index1]; + } + else + { + index1 -= m_maxHandles; + btAssert((index1 >= 0) && (index1 < (unsigned int)m_maxLargeHandles)); + proxy1 = &m_pLargeHandles[index1]; + } + if(indx1_s & BT_3DGRID_PAIR_NEW_FLG) + { + m_pairCache->addOverlappingPair(proxy0,proxy1); + m_numPairsAdded++; + } + else + { + m_pairCache->removeOverlappingPair(proxy0,proxy1,dispatcher); + m_numPairsRemoved++; + } + } + } +} + + + +btBroadphaseProxy* btGpu3DGridBroadphase::createProxy( const btVector3& aabbMin, const btVector3& aabbMax,int shapeType,void* userPtr ,short int collisionFilterGroup,short int collisionFilterMask, btDispatcher* dispatcher,void* multiSapProxy) +{ + btBroadphaseProxy* proxy; + bool bIsLarge = isLargeProxy(aabbMin, aabbMax); + if(bIsLarge) + { + if (m_numLargeHandles >= m_maxLargeHandles) + { + ///you have to increase the cell size, so 'large' proxies become 'small' proxies (fitting a cell) + btAssert(0); + return 0; //should never happen, but don't let the game crash ;-) + } + btAssert((aabbMin[0]<= aabbMax[0]) && (aabbMin[1]<= aabbMax[1]) && (aabbMin[2]<= aabbMax[2])); + int newHandleIndex = allocLargeHandle(); + proxy = new (&m_pLargeHandles[newHandleIndex])btSimpleBroadphaseProxy(aabbMin,aabbMax,shapeType,userPtr,collisionFilterGroup,collisionFilterMask,multiSapProxy); + } + else + { + proxy = btSimpleBroadphase::createProxy(aabbMin, aabbMax, shapeType, userPtr, collisionFilterGroup, collisionFilterMask, dispatcher, multiSapProxy); + } + return proxy; +} + + + +void btGpu3DGridBroadphase::destroyProxy(btBroadphaseProxy* proxy, btDispatcher* dispatcher) +{ + bool bIsLarge = isLargeProxy(proxy); + if(bIsLarge) + { + + btSimpleBroadphaseProxy* proxy0 = static_cast(proxy); + freeLargeHandle(proxy0); + m_pairCache->removeOverlappingPairsContainingProxy(proxy,dispatcher); + } + else + { + btSimpleBroadphase::destroyProxy(proxy, dispatcher); + } + return; +} + + + +void btGpu3DGridBroadphase::resetPool(btDispatcher* dispatcher) +{ + m_hPairBuffStartCurr[0] = 0; + m_hPairBuffStartCurr[1] = 0; + for(int i = 1; i <= m_maxHandles; i++) + { + m_hPairBuffStartCurr[i * 2] = m_hPairBuffStartCurr[(i-1) * 2] + m_maxPairsPerBody; + m_hPairBuffStartCurr[i * 2 + 1] = 0; + } +} + + + +bool btGpu3DGridBroadphase::isLargeProxy(const btVector3& aabbMin, const btVector3& aabbMax) +{ + btVector3 diag = aabbMax - aabbMin; + ///use the bounding sphere radius of this bounding box, to include rotation + btScalar radius = diag.length() * btScalar(0.5f); + return (radius > m_maxRadius); +} + + + +bool btGpu3DGridBroadphase::isLargeProxy(btBroadphaseProxy* proxy) +{ + return (proxy->getUid() >= (m_maxHandles+2)); +} + + + +void btGpu3DGridBroadphase::addLarge2LargePairsToCache(btDispatcher* dispatcher) +{ + int i,j; + if (m_numLargeHandles <= 0) + { + return; + } + int new_largest_index = -1; + for(i = 0; i <= m_LastLargeHandleIndex; i++) + { + btSimpleBroadphaseProxy* proxy0 = &m_pLargeHandles[i]; + new_largest_index = i; + for(j = i + 1; j <= m_LastLargeHandleIndex; j++) + { + btSimpleBroadphaseProxy* proxy1 = &m_pLargeHandles[j]; + btAssert(proxy0 != proxy1); + btSimpleBroadphaseProxy* p0 = getSimpleProxyFromProxy(proxy0); + btSimpleBroadphaseProxy* p1 = getSimpleProxyFromProxy(proxy1); + if(aabbOverlap(p0,p1)) + { + if (!m_pairCache->findPair(proxy0,proxy1)) + { + m_pairCache->addOverlappingPair(proxy0,proxy1); + } + } + else + { + if(m_pairCache->findPair(proxy0,proxy1)) + { + m_pairCache->removeOverlappingPair(proxy0,proxy1,dispatcher); + } + } + } + } + m_LastLargeHandleIndex = new_largest_index; + return; +} + + + +void btGpu3DGridBroadphase::rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback) +{ + btSimpleBroadphase::rayTest(rayFrom, rayTo, rayCallback); + for (int i=0; i <= m_LastLargeHandleIndex; i++) + { + btSimpleBroadphaseProxy* proxy = &m_pLargeHandles[i]; + rayCallback.process(proxy); + } +} + + + +// +// overrides for CPU version +// + + + +void btGpu3DGridBroadphase::prepareAABB() +{ + BT_PROFILE("prepareAABB"); + bt3DGrid3F1U* pBB = m_hAABB; + int i; + int new_largest_index = -1; + unsigned int num_small = 0; + for(i = 0; i <= m_LastHandleIndex; i++) + { + btSimpleBroadphaseProxy* proxy0 = &m_pHandles[i]; + new_largest_index = i; + pBB->fx = proxy0->m_aabbMin.getX(); + pBB->fy = proxy0->m_aabbMin.getY(); + pBB->fz = proxy0->m_aabbMin.getZ(); + pBB->uw = i; + pBB++; + pBB->fx = proxy0->m_aabbMax.getX(); + pBB->fy = proxy0->m_aabbMax.getY(); + pBB->fz = proxy0->m_aabbMax.getZ(); + pBB->uw = num_small; + pBB++; + num_small++; + } + m_LastHandleIndex = new_largest_index; + new_largest_index = -1; + unsigned int num_large = 0; + for(i = 0; i <= m_LastLargeHandleIndex; i++) + { + btSimpleBroadphaseProxy* proxy0 = &m_pLargeHandles[i]; + new_largest_index = i; + pBB->fx = proxy0->m_aabbMin.getX(); + pBB->fy = proxy0->m_aabbMin.getY(); + pBB->fz = proxy0->m_aabbMin.getZ(); + pBB->uw = i + m_maxHandles; + pBB++; + pBB->fx = proxy0->m_aabbMax.getX(); + pBB->fy = proxy0->m_aabbMax.getY(); + pBB->fz = proxy0->m_aabbMax.getZ(); + pBB->uw = num_large + m_maxHandles; + pBB++; + num_large++; + } + m_LastLargeHandleIndex = new_largest_index; + // paranoid checks + btAssert(num_small == m_numHandles); + btAssert(num_large == m_numLargeHandles); + return; +} + + + +void btGpu3DGridBroadphase::setParameters(bt3DGridBroadphaseParams* hostParams) +{ + s3DGridBroadphaseParams = *hostParams; + return; +} + + + +void btGpu3DGridBroadphase::calcHashAABB() +{ + BT_PROFILE("bt3DGrid_calcHashAABB"); + btGpu_calcHashAABB(m_hAABB, m_hBodiesHash, m_numHandles); + return; +} + + + +void btGpu3DGridBroadphase::sortHash() +{ + class bt3DGridHashKey + { + public: + unsigned int hash; + unsigned int index; + void quickSort(bt3DGridHashKey* pData, int lo, int hi) + { + int i=lo, j=hi; + bt3DGridHashKey x = pData[(lo+hi)/2]; + do + { + while(pData[i].hash > x.hash) i++; + while(x.hash > pData[j].hash) j--; + if(i <= j) + { + bt3DGridHashKey t = pData[i]; + pData[i] = pData[j]; + pData[j] = t; + i++; j--; + } + } while(i <= j); + if(lo < j) pData->quickSort(pData, lo, j); + if(i < hi) pData->quickSort(pData, i, hi); + } + }; + BT_PROFILE("bt3DGrid_sortHash"); + bt3DGridHashKey* pHash = (bt3DGridHashKey*)m_hBodiesHash; + pHash->quickSort(pHash, 0, m_numHandles - 1); + return; +} + + + +void btGpu3DGridBroadphase::findCellStart() +{ + BT_PROFILE("bt3DGrid_findCellStart"); + btGpu_findCellStart(m_hBodiesHash, m_hCellStart, m_numHandles, m_params.m_numCells); + return; +} + + + +void btGpu3DGridBroadphase::findOverlappingPairs() +{ + BT_PROFILE("bt3DGrid_findOverlappingPairs"); + btGpu_findOverlappingPairs(m_hAABB, m_hBodiesHash, m_hCellStart, m_hPairBuff, m_hPairBuffStartCurr, m_numHandles); + return; +} + + + +void btGpu3DGridBroadphase::findPairsLarge() +{ + BT_PROFILE("bt3DGrid_findPairsLarge"); + btGpu_findPairsLarge(m_hAABB, m_hBodiesHash, m_hCellStart, m_hPairBuff, m_hPairBuffStartCurr, m_numHandles, m_numLargeHandles); + return; +} + + + +void btGpu3DGridBroadphase::computePairCacheChanges() +{ + BT_PROFILE("bt3DGrid_computePairCacheChanges"); + btGpu_computePairCacheChanges(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScanChanged, m_hAABB, m_numHandles); + return; +} + + +void btGpu3DGridBroadphase::scanOverlappingPairBuff(bool copyToCpu) +{ + BT_PROFILE("bt3DGrid_scanOverlappingPairBuff"); + unsigned int sum = 0; + m_hPairScanChanged[0]=0; + for(int i = 0; i <= m_numHandles+1; i++) + { + unsigned int delta = m_hPairScanChanged[i]; + m_hPairScanChanged[i] = sum; + sum += delta; + } + return; +} + + + +void btGpu3DGridBroadphase::squeezeOverlappingPairBuff() +{ + BT_PROFILE("bt3DGrid_squeezeOverlappingPairBuff"); + //btGpu_squeezeOverlappingPairBuff(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScanChanged, m_hPairsChanged, m_hAABB, m_numHandles); + btGpu_squeezeOverlappingPairBuff(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScanChanged, (unsigned int*)m_hAllOverlappingPairs, m_hAABB, m_numHandles); + + return; +} + + + +#include "btGpu3DGridBroadphaseSharedCode.h" + diff --git a/Extras/RigidBodyGpuPipeline/opencl/3dGridBroadphase/Shared/btGpu3DGridBroadphase.h b/Extras/RigidBodyGpuPipeline/opencl/3dGridBroadphase/Shared/btGpu3DGridBroadphase.h new file mode 100644 index 000000000..8441151f7 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/3dGridBroadphase/Shared/btGpu3DGridBroadphase.h @@ -0,0 +1,154 @@ +/* +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +//---------------------------------------------------------------------------------------- + +#ifndef BTGPU3DGRIDBROADPHASE_H +#define BTGPU3DGRIDBROADPHASE_H + +//---------------------------------------------------------------------------------------- + +#include "BulletCollision/BroadphaseCollision/btSimpleBroadphase.h" + +#include "btGpu3DGridBroadphaseSharedTypes.h" +struct MyUint2 +{ + int x; + int y; +}; + +//---------------------------------------------------------------------------------------- + +///The btGpu3DGridBroadphase uses GPU-style code compiled for CPU to compute overlapping pairs + +class btGpu3DGridBroadphase : public btSimpleBroadphase +{ +protected: + bool m_bInitialized; + unsigned int m_numBodies; + unsigned int m_numCells; + unsigned int m_maxPairsPerBody; + unsigned int m_maxBodiesPerCell; + bt3DGridBroadphaseParams m_params; + btScalar m_maxRadius; + // CPU data + unsigned int* m_hBodiesHash; + unsigned int* m_hCellStart; + unsigned int* m_hPairBuffStartCurr; + bt3DGrid3F1U* m_hAABB; + unsigned int* m_hPairBuff; + unsigned int* m_hPairScanChanged; + unsigned int* m_hPairsChanged; + MyUint2* m_hAllOverlappingPairs; +// large proxies + int m_numLargeHandles; + int m_maxLargeHandles; + int m_LastLargeHandleIndex; + btSimpleBroadphaseProxy* m_pLargeHandles; + void* m_pLargeHandlesRawPtr; + int m_firstFreeLargeHandle; + int allocLargeHandle() + { + btAssert(m_numLargeHandles < m_maxLargeHandles); + int freeLargeHandle = m_firstFreeLargeHandle; + m_firstFreeLargeHandle = m_pLargeHandles[freeLargeHandle].GetNextFree(); + m_numLargeHandles++; + if(freeLargeHandle > m_LastLargeHandleIndex) + { + m_LastLargeHandleIndex = freeLargeHandle; + } + return freeLargeHandle; + } + void freeLargeHandle(btSimpleBroadphaseProxy* proxy) + { + int handle = int(proxy - m_pLargeHandles); + btAssert((handle >= 0) && (handle < m_maxHandles)); + if(handle == m_LastLargeHandleIndex) + { + m_LastLargeHandleIndex--; + } + proxy->SetNextFree(m_firstFreeLargeHandle); + m_firstFreeLargeHandle = handle; + proxy->m_clientObject = 0; + m_numLargeHandles--; + } + bool isLargeProxy(const btVector3& aabbMin, const btVector3& aabbMax); + bool isLargeProxy(btBroadphaseProxy* proxy); +// debug + unsigned int m_numPairsAdded; + unsigned int m_numPairsRemoved; + unsigned int m_numOverflows; +// +public: + virtual int getNumOverlap() + { + return m_hPairScanChanged[m_numHandles+1]; + } + virtual MyUint2* getOverlap() + { + return m_hAllOverlappingPairs; + } + // NOTE : for better results gridSizeX, gridSizeY and gridSizeZ should be powers of 2 + btGpu3DGridBroadphase(const btVector3& cellSize, + int gridSizeX, int gridSizeY, int gridSizeZ, + int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody, + btScalar maxSmallProxySize, + int maxBodiesPerCell = 8); + btGpu3DGridBroadphase( btOverlappingPairCache* overlappingPairCache, + const btVector3& cellSize, + int gridSizeX, int gridSizeY, int gridSizeZ, + int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody, + btScalar maxSmallProxySize, + int maxBodiesPerCell = 8); + virtual ~btGpu3DGridBroadphase(); + virtual void calculateOverlappingPairs(btDispatcher* dispatcher); + + virtual btBroadphaseProxy* createProxy(const btVector3& aabbMin, const btVector3& aabbMax,int shapeType,void* userPtr ,short int collisionFilterGroup,short int collisionFilterMask, btDispatcher* dispatcher,void* multiSapProxy); + virtual void destroyProxy(btBroadphaseProxy* proxy,btDispatcher* dispatcher); + virtual void rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback); + virtual void resetPool(btDispatcher* dispatcher); + + static int getFloorPowOfTwo(int val); // returns 2^n : 2^(n+1) > val >= 2^n + +protected: + void _initialize( const btVector3& cellSize, + int gridSizeX, int gridSizeY, int gridSizeZ, + int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody, + btScalar maxSmallProxySize, + int maxBodiesPerCell); + void _finalize(); + void addPairsToCache(btDispatcher* dispatcher); + void addLarge2LargePairsToCache(btDispatcher* dispatcher); + +// overrides for CPU version + virtual void setParameters(bt3DGridBroadphaseParams* hostParams); + virtual void prepareAABB(); + virtual void calcHashAABB(); + virtual void sortHash(); + virtual void findCellStart(); + virtual void findOverlappingPairs(); + virtual void findPairsLarge(); + virtual void computePairCacheChanges(); + virtual void scanOverlappingPairBuff(bool copyToCpu=true); + virtual void squeezeOverlappingPairBuff(); +}; + +//---------------------------------------------------------------------------------------- + +#endif //BTGPU3DGRIDBROADPHASE_H + +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- diff --git a/Extras/RigidBodyGpuPipeline/opencl/3dGridBroadphase/Shared/btGpu3DGridBroadphaseSharedCode.h b/Extras/RigidBodyGpuPipeline/opencl/3dGridBroadphase/Shared/btGpu3DGridBroadphaseSharedCode.h new file mode 100644 index 000000000..08df68a65 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/3dGridBroadphase/Shared/btGpu3DGridBroadphaseSharedCode.h @@ -0,0 +1,428 @@ +/* +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +//---------------------------------------------------------------------------------------- + +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +// K E R N E L F U N C T I O N S +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- + +// calculate position in uniform grid +BT_GPU___device__ int3 bt3DGrid_calcGridPos(float4 p) +{ + int3 gridPos; + gridPos.x = (int)floor(p.x * BT_GPU_params.m_invCellSizeX) & (BT_GPU_params.m_gridSizeX - 1); + gridPos.y = (int)floor(p.y * BT_GPU_params.m_invCellSizeY) & (BT_GPU_params.m_gridSizeY - 1); + gridPos.z = (int)floor(p.z * BT_GPU_params.m_invCellSizeZ) & (BT_GPU_params.m_gridSizeZ - 1); + return gridPos; +} // bt3DGrid_calcGridPos() + +//---------------------------------------------------------------------------------------- + +// calculate address in grid from position (clamping to edges) +BT_GPU___device__ uint bt3DGrid_calcGridHash(int3 gridPos) +{ + gridPos.x &= (BT_GPU_params.m_gridSizeX - 1); + gridPos.y &= (BT_GPU_params.m_gridSizeY - 1); + gridPos.z &= (BT_GPU_params.m_gridSizeZ - 1); + return BT_GPU___mul24(BT_GPU___mul24(gridPos.z, BT_GPU_params.m_gridSizeY), BT_GPU_params.m_gridSizeX) + BT_GPU___mul24(gridPos.y, BT_GPU_params.m_gridSizeX) + gridPos.x; +} // bt3DGrid_calcGridHash() + +//---------------------------------------------------------------------------------------- + +// calculate grid hash value for each body using its AABB +BT_GPU___global__ void calcHashAABBD(bt3DGrid3F1U* pAABB, uint2* pHash, uint numBodies) +{ + int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x; + if(index >= (int)numBodies) + { + return; + } + bt3DGrid3F1U bbMin = pAABB[index*2]; + bt3DGrid3F1U bbMax = pAABB[index*2 + 1]; + float4 pos; + pos.x = (bbMin.fx + bbMax.fx) * 0.5f; + pos.y = (bbMin.fy + bbMax.fy) * 0.5f; + pos.z = (bbMin.fz + bbMax.fz) * 0.5f; + // get address in grid + int3 gridPos = bt3DGrid_calcGridPos(pos); + uint gridHash = bt3DGrid_calcGridHash(gridPos); + // store grid hash and body index + pHash[index] = BT_GPU_make_uint2(gridHash, index); +} // calcHashAABBD() + +//---------------------------------------------------------------------------------------- + +BT_GPU___global__ void findCellStartD(uint2* pHash, uint* cellStart, uint numBodies) +{ + int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x; + if(index >= (int)numBodies) + { + return; + } + uint2 sortedData = pHash[index]; + // Load hash data into shared memory so that we can look + // at neighboring body's hash value without loading + // two hash values per thread + BT_GPU___shared__ uint sharedHash[257]; + sharedHash[BT_GPU_threadIdx.x+1] = sortedData.x; + if((index > 0) && (BT_GPU_threadIdx.x == 0)) + { + // first thread in block must load neighbor body hash + volatile uint2 prevData = pHash[index-1]; + sharedHash[0] = prevData.x; + } + BT_GPU___syncthreads(); + if((index == 0) || (sortedData.x != sharedHash[BT_GPU_threadIdx.x])) + { + cellStart[sortedData.x] = index; + } +} // findCellStartD() + +//---------------------------------------------------------------------------------------- + +BT_GPU___device__ uint cudaTestAABBOverlap(bt3DGrid3F1U min0, bt3DGrid3F1U max0, bt3DGrid3F1U min1, bt3DGrid3F1U max1) +{ + return (min0.fx <= max1.fx)&& (min1.fx <= max0.fx) && + (min0.fy <= max1.fy)&& (min1.fy <= max0.fy) && + (min0.fz <= max1.fz)&& (min1.fz <= max0.fz); +} // cudaTestAABBOverlap() + +//---------------------------------------------------------------------------------------- + +BT_GPU___device__ void findPairsInCell( int3 gridPos, + uint index, + uint2* pHash, + uint* pCellStart, + bt3DGrid3F1U* pAABB, + uint* pPairBuff, + uint2* pPairBuffStartCurr, + uint numBodies) +{ + uint gridHash = bt3DGrid_calcGridHash(gridPos); + // get start of bucket for this cell + uint bucketStart = pCellStart[gridHash]; + if (bucketStart == 0xffffffff) + { + return; // cell empty + } + // iterate over bodies in this cell + uint2 sortedData = pHash[index]; + uint unsorted_indx = sortedData.y; + bt3DGrid3F1U min0 = BT_GPU_FETCH(pAABB, unsorted_indx*2); + bt3DGrid3F1U max0 = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1); + uint handleIndex = min0.uw; + uint2 start_curr = pPairBuffStartCurr[handleIndex]; + uint start = start_curr.x; + uint curr = start_curr.y; + uint2 start_curr_next = pPairBuffStartCurr[handleIndex+1]; + uint curr_max = start_curr_next.x - start - 1; + uint bucketEnd = bucketStart + BT_GPU_params.m_maxBodiesPerCell; + bucketEnd = (bucketEnd > numBodies) ? numBodies : bucketEnd; + for(uint index2 = bucketStart; index2 < bucketEnd; index2++) + { + uint2 cellData = pHash[index2]; + if (cellData.x != gridHash) + { + break; // no longer in same bucket + } + uint unsorted_indx2 = cellData.y; + if (unsorted_indx2 < unsorted_indx) // check not colliding with self + { + bt3DGrid3F1U min1 = BT_GPU_FETCH(pAABB, unsorted_indx2*2); + bt3DGrid3F1U max1 = BT_GPU_FETCH(pAABB, unsorted_indx2*2 + 1); + if(cudaTestAABBOverlap(min0, max0, min1, max1)) + { + uint handleIndex2 = min1.uw; + uint k; + for(k = 0; k < curr; k++) + { + uint old_pair = pPairBuff[start+k] & (~BT_3DGRID_PAIR_ANY_FLG); + if(old_pair == handleIndex2) + { + pPairBuff[start+k] |= BT_3DGRID_PAIR_FOUND_FLG; + break; + } + } + if(k == curr) + { + if(curr >= curr_max) + { // not a good solution, but let's avoid crash + break; + } + pPairBuff[start+curr] = handleIndex2 | BT_3DGRID_PAIR_NEW_FLG; + curr++; + } + } + } + } + pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, curr); + return; +} // findPairsInCell() + +//---------------------------------------------------------------------------------------- + +BT_GPU___global__ void findOverlappingPairsD( bt3DGrid3F1U* pAABB, uint2* pHash, uint* pCellStart, + uint* pPairBuff, uint2* pPairBuffStartCurr, uint numBodies) +{ + int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x; + if(index >= (int)numBodies) + { + return; + } + uint2 sortedData = pHash[index]; + uint unsorted_indx = sortedData.y; + bt3DGrid3F1U bbMin = BT_GPU_FETCH(pAABB, unsorted_indx*2); + bt3DGrid3F1U bbMax = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1); + float4 pos; + pos.x = (bbMin.fx + bbMax.fx) * 0.5f; + pos.y = (bbMin.fy + bbMax.fy) * 0.5f; + pos.z = (bbMin.fz + bbMax.fz) * 0.5f; + // get address in grid + int3 gridPos = bt3DGrid_calcGridPos(pos); + // examine only neighbouring cells + for(int z=-1; z<=1; z++) { + for(int y=-1; y<=1; y++) { + for(int x=-1; x<=1; x++) { + findPairsInCell(gridPos + BT_GPU_make_int3(x, y, z), index, pHash, pCellStart, pAABB, pPairBuff, pPairBuffStartCurr, numBodies); + } + } + } +} // findOverlappingPairsD() + +//---------------------------------------------------------------------------------------- + +BT_GPU___global__ void findPairsLargeD( bt3DGrid3F1U* pAABB, uint2* pHash, uint* pCellStart, uint* pPairBuff, + uint2* pPairBuffStartCurr, uint numBodies, uint numLarge) +{ + int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x; + if(index >= (int)numBodies) + { + return; + } + uint2 sortedData = pHash[index]; + uint unsorted_indx = sortedData.y; + bt3DGrid3F1U min0 = BT_GPU_FETCH(pAABB, unsorted_indx*2); + bt3DGrid3F1U max0 = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1); + uint handleIndex = min0.uw; + uint2 start_curr = pPairBuffStartCurr[handleIndex]; + uint start = start_curr.x; + uint curr = start_curr.y; + uint2 start_curr_next = pPairBuffStartCurr[handleIndex+1]; + uint curr_max = start_curr_next.x - start - 1; + for(uint i = 0; i < numLarge; i++) + { + uint indx2 = numBodies + i; + bt3DGrid3F1U min1 = BT_GPU_FETCH(pAABB, indx2*2); + bt3DGrid3F1U max1 = BT_GPU_FETCH(pAABB, indx2*2 + 1); + if(cudaTestAABBOverlap(min0, max0, min1, max1)) + { + uint k; + uint handleIndex2 = min1.uw; + for(k = 0; k < curr; k++) + { + uint old_pair = pPairBuff[start+k] & (~BT_3DGRID_PAIR_ANY_FLG); + if(old_pair == handleIndex2) + { + pPairBuff[start+k] |= BT_3DGRID_PAIR_FOUND_FLG; + break; + } + } + if(k == curr) + { + pPairBuff[start+curr] = handleIndex2 | BT_3DGRID_PAIR_NEW_FLG; + if(curr >= curr_max) + { // not a good solution, but let's avoid crash + break; + } + curr++; + } + } + } + pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, curr); + return; +} // findPairsLargeD() + +//---------------------------------------------------------------------------------------- + +BT_GPU___global__ void computePairCacheChangesD(uint* pPairBuff, uint2* pPairBuffStartCurr, + uint* pPairScan, bt3DGrid3F1U* pAABB, uint numBodies) +{ + int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x; + if(index >= (int)numBodies) + { + return; + } + bt3DGrid3F1U bbMin = pAABB[index * 2]; + uint handleIndex = bbMin.uw; + uint2 start_curr = pPairBuffStartCurr[handleIndex]; + uint start = start_curr.x; + uint curr = start_curr.y; + uint *pInp = pPairBuff + start; + uint num_changes = 0; + for(uint k = 0; k < curr; k++, pInp++) + { + //if(!((*pInp) & BT_3DGRID_PAIR_FOUND_FLG)) + if(((*pInp) & BT_3DGRID_PAIR_ANY_FLG)) + { + num_changes++; + } + } + pPairScan[index+1] = num_changes; +} // computePairCacheChangesD() + +//---------------------------------------------------------------------------------------- + +BT_GPU___global__ void squeezeOverlappingPairBuffD(uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan, + uint2* pPairOut, bt3DGrid3F1U* pAABB, uint numBodies) +{ + int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x; + if(index >= (int)numBodies) + { + return; + } + bt3DGrid3F1U bbMin = pAABB[index * 2]; + uint handleIndex = bbMin.uw; + uint2 start_curr = pPairBuffStartCurr[handleIndex]; + uint start = start_curr.x; + uint curr = start_curr.y; + uint* pInp = pPairBuff + start; + uint2* pOut = pPairOut + pPairScan[index+1]; + uint* pOut2 = pInp; + uint num = 0; + for(uint k = 0; k < curr; k++, pInp++) + { + if((*pInp) & BT_3DGRID_PAIR_ANY_FLG) + //if(!((*pInp) & BT_3DGRID_PAIR_FOUND_FLG)) + { + pOut->x = handleIndex; + pOut->y = (*pInp) & (~BT_3DGRID_PAIR_ANY_FLG); + + pOut++; + } + if((*pInp) & BT_3DGRID_PAIR_ANY_FLG) + { + *pOut2 = (*pInp) & (~BT_3DGRID_PAIR_ANY_FLG); + pOut2++; + num++; + } + } + pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, num); +} // squeezeOverlappingPairBuffD() + + +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +// E N D O F K E R N E L F U N C T I O N S +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- + +extern "C" +{ + +//---------------------------------------------------------------------------------------- + +void BT_GPU_PREF(calcHashAABB)(bt3DGrid3F1U* pAABB, unsigned int* hash, unsigned int numBodies) +{ + int numThreads, numBlocks; + BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads); + // execute the kernel + BT_GPU_EXECKERNEL(numBlocks, numThreads, calcHashAABBD, (pAABB, (uint2*)hash, numBodies)); + // check if kernel invocation generated an error + BT_GPU_CHECK_ERROR("calcHashAABBD kernel execution failed"); +} // calcHashAABB() + +//---------------------------------------------------------------------------------------- + +void BT_GPU_PREF(findCellStart(unsigned int* hash, unsigned int* cellStart, unsigned int numBodies, unsigned int numCells)) +{ + int numThreads, numBlocks; + BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads); + BT_GPU_SAFE_CALL(BT_GPU_Memset(cellStart, 0xffffffff, numCells*sizeof(uint))); + BT_GPU_EXECKERNEL(numBlocks, numThreads, findCellStartD, ((uint2*)hash, (uint*)cellStart, numBodies)); + BT_GPU_CHECK_ERROR("Kernel execution failed: findCellStartD"); +} // findCellStart() + +//---------------------------------------------------------------------------------------- + +void BT_GPU_PREF(findOverlappingPairs(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies)) +{ +#if B_CUDA_USE_TEX + BT_GPU_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, numBodies * 2 * sizeof(bt3DGrid3F1U))); +#endif + int numThreads, numBlocks; + BT_GPU_PREF(computeGridSize)(numBodies, 64, numBlocks, numThreads); + BT_GPU_EXECKERNEL(numBlocks, numThreads, findOverlappingPairsD, (pAABB,(uint2*)pHash,(uint*)pCellStart,(uint*)pPairBuff,(uint2*)pPairBuffStartCurr,numBodies)); + BT_GPU_CHECK_ERROR("Kernel execution failed: bt_CudaFindOverlappingPairsD"); +#if B_CUDA_USE_TEX + BT_GPU_SAFE_CALL(cudaUnbindTexture(pAABBTex)); +#endif +} // findOverlappingPairs() + +//---------------------------------------------------------------------------------------- + +void BT_GPU_PREF(findPairsLarge(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies, unsigned int numLarge)) +{ +#if B_CUDA_USE_TEX + BT_GPU_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, (numBodies+numLarge) * 2 * sizeof(bt3DGrid3F1U))); +#endif + int numThreads, numBlocks; + BT_GPU_PREF(computeGridSize)(numBodies, 64, numBlocks, numThreads); + BT_GPU_EXECKERNEL(numBlocks, numThreads, findPairsLargeD, (pAABB,(uint2*)pHash,(uint*)pCellStart,(uint*)pPairBuff,(uint2*)pPairBuffStartCurr,numBodies,numLarge)); + BT_GPU_CHECK_ERROR("Kernel execution failed: btCuda_findPairsLargeD"); +#if B_CUDA_USE_TEX + BT_GPU_SAFE_CALL(cudaUnbindTexture(pAABBTex)); +#endif +} // findPairsLarge() + +//---------------------------------------------------------------------------------------- + +void BT_GPU_PREF(computePairCacheChanges(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, bt3DGrid3F1U* pAABB, unsigned int numBodies)) +{ + int numThreads, numBlocks; + BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads); + BT_GPU_EXECKERNEL(numBlocks, numThreads, computePairCacheChangesD, ((uint*)pPairBuff,(uint2*)pPairBuffStartCurr,(uint*)pPairScan,pAABB,numBodies)); + BT_GPU_CHECK_ERROR("Kernel execution failed: btCudaComputePairCacheChangesD"); +} // computePairCacheChanges() + +//---------------------------------------------------------------------------------------- + +void BT_GPU_PREF(squeezeOverlappingPairBuff(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, unsigned int* pPairOut, bt3DGrid3F1U* pAABB, unsigned int numBodies)) +{ + int numThreads, numBlocks; + BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads); + BT_GPU_EXECKERNEL(numBlocks, numThreads, squeezeOverlappingPairBuffD, ((uint*)pPairBuff,(uint2*)pPairBuffStartCurr,(uint*)pPairScan,(uint2*)pPairOut,pAABB,numBodies)); + BT_GPU_CHECK_ERROR("Kernel execution failed: btCudaSqueezeOverlappingPairBuffD"); +} // btCuda_squeezeOverlappingPairBuff() + +//------------------------------------------------------------------------------------------------ + +} // extern "C" + +//------------------------------------------------------------------------------------------------ +//------------------------------------------------------------------------------------------------ +//------------------------------------------------------------------------------------------------ diff --git a/Extras/RigidBodyGpuPipeline/opencl/3dGridBroadphase/Shared/btGpu3DGridBroadphaseSharedDefs.h b/Extras/RigidBodyGpuPipeline/opencl/3dGridBroadphase/Shared/btGpu3DGridBroadphaseSharedDefs.h new file mode 100644 index 000000000..607bda7ed --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/3dGridBroadphase/Shared/btGpu3DGridBroadphaseSharedDefs.h @@ -0,0 +1,61 @@ +/* +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +//---------------------------------------------------------------------------------------- + +// Shared definitions for GPU-based 3D Grid collision detection broadphase + +//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +// Keep this file free from Bullet headers +// it is included into both CUDA and CPU code +//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +//---------------------------------------------------------------------------------------- + +#ifndef BTGPU3DGRIDBROADPHASESHAREDDEFS_H +#define BTGPU3DGRIDBROADPHASESHAREDDEFS_H + +//---------------------------------------------------------------------------------------- + +#include "btGpu3DGridBroadphaseSharedTypes.h" + +//---------------------------------------------------------------------------------------- + +extern "C" +{ + +//---------------------------------------------------------------------------------------- + +void BT_GPU_PREF(calcHashAABB)(bt3DGrid3F1U* pAABB, unsigned int* hash, unsigned int numBodies); + +void BT_GPU_PREF(findCellStart)(unsigned int* hash, unsigned int* cellStart, unsigned int numBodies, unsigned int numCells); + +void BT_GPU_PREF(findOverlappingPairs)(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies); + +void BT_GPU_PREF(findPairsLarge)(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies, unsigned int numLarge); + +void BT_GPU_PREF(computePairCacheChanges)(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, bt3DGrid3F1U* pAABB, unsigned int numBodies); + +void BT_GPU_PREF(squeezeOverlappingPairBuff)(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, unsigned int* pPairOut, bt3DGrid3F1U* pAABB, unsigned int numBodies); + + +//---------------------------------------------------------------------------------------- + +} // extern "C" + +//---------------------------------------------------------------------------------------- + +#endif // BTGPU3DGRIDBROADPHASESHAREDDEFS_H + diff --git a/Extras/RigidBodyGpuPipeline/opencl/3dGridBroadphase/Shared/btGpu3DGridBroadphaseSharedTypes.h b/Extras/RigidBodyGpuPipeline/opencl/3dGridBroadphase/Shared/btGpu3DGridBroadphaseSharedTypes.h new file mode 100644 index 000000000..5b2e65ec0 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/3dGridBroadphase/Shared/btGpu3DGridBroadphaseSharedTypes.h @@ -0,0 +1,64 @@ +/* +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +//---------------------------------------------------------------------------------------- + +// Shared definitions for GPU-based 3D Grid collision detection broadphase + +//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +// Keep this file free from Bullet headers +// it is included into both CUDA and CPU code +//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +//---------------------------------------------------------------------------------------- + +#ifndef BTGPU3DGRIDBROADPHASESHAREDTYPES_H +#define BTGPU3DGRIDBROADPHASESHAREDTYPES_H + +//---------------------------------------------------------------------------------------- + +#define BT_3DGRID_PAIR_FOUND_FLG (0x40000000) +#define BT_3DGRID_PAIR_NEW_FLG (0x20000000) +#define BT_3DGRID_PAIR_ANY_FLG (BT_3DGRID_PAIR_FOUND_FLG | BT_3DGRID_PAIR_NEW_FLG) + +//---------------------------------------------------------------------------------------- + +struct bt3DGridBroadphaseParams +{ + unsigned int m_gridSizeX; + unsigned int m_gridSizeY; + unsigned int m_gridSizeZ; + unsigned int m_numCells; + float m_invCellSizeX; + float m_invCellSizeY; + float m_invCellSizeZ; + unsigned int m_numBodies; + unsigned int m_maxBodiesPerCell; +}; + +//---------------------------------------------------------------------------------------- + +struct bt3DGrid3F1U +{ + float fx; + float fy; + float fz; + unsigned int uw; +}; + +//---------------------------------------------------------------------------------------- + +#endif // BTGPU3DGRIDBROADPHASESHAREDTYPES_H + diff --git a/Extras/RigidBodyGpuPipeline/opencl/3dGridBroadphase/Shared/btGpuDefines.h b/Extras/RigidBodyGpuPipeline/opencl/3dGridBroadphase/Shared/btGpuDefines.h new file mode 100644 index 000000000..f9315ab64 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/3dGridBroadphase/Shared/btGpuDefines.h @@ -0,0 +1,211 @@ +/* +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + + +// definitions for "GPU on CPU" code + + +#ifndef BT_GPU_DEFINES_H +#define BT_GPU_DEFINES_H + +typedef unsigned int uint; + +struct int2 +{ + int x, y; +}; + +struct uint2 +{ + unsigned int x, y; +}; + +struct int3 +{ + int x, y, z; +}; + +struct uint3 +{ + unsigned int x, y, z; +}; + +struct float4 +{ + float x, y, z, w; +}; + +struct float3 +{ + float x, y, z; +}; + + +#define BT_GPU___device__ inline +#define BT_GPU___devdata__ +#define BT_GPU___constant__ +#define BT_GPU_max(a, b) ((a) > (b) ? (a) : (b)) +#define BT_GPU_min(a, b) ((a) < (b) ? (a) : (b)) +#define BT_GPU_params s3DGridBroadphaseParams +#define BT_GPU___mul24(a, b) ((a)*(b)) +#define BT_GPU___global__ inline +#define BT_GPU___shared__ static +#define BT_GPU___syncthreads() +#define CUDART_PI_F SIMD_PI + +static inline uint2 bt3dGrid_make_uint2(unsigned int x, unsigned int y) +{ + uint2 t; t.x = x; t.y = y; return t; +} +#define BT_GPU_make_uint2(x, y) bt3dGrid_make_uint2(x, y) + +static inline int3 bt3dGrid_make_int3(int x, int y, int z) +{ + int3 t; t.x = x; t.y = y; t.z = z; return t; +} +#define BT_GPU_make_int3(x, y, z) bt3dGrid_make_int3(x, y, z) + +static inline float3 bt3dGrid_make_float3(float x, float y, float z) +{ + float3 t; t.x = x; t.y = y; t.z = z; return t; +} +#define BT_GPU_make_float3(x, y, z) bt3dGrid_make_float3(x, y, z) + +static inline float3 bt3dGrid_make_float34(float4 f) +{ + float3 t; t.x = f.x; t.y = f.y; t.z = f.z; return t; +} +#define BT_GPU_make_float34(f) bt3dGrid_make_float34(f) + +static inline float3 bt3dGrid_make_float31(float f) +{ + float3 t; t.x = t.y = t.z = f; return t; +} +#define BT_GPU_make_float31(x) bt3dGrid_make_float31(x) + +static inline float4 bt3dGrid_make_float42(float3 v, float f) +{ + float4 t; t.x = v.x; t.y = v.y; t.z = v.z; t.w = f; return t; +} +#define BT_GPU_make_float42(a, b) bt3dGrid_make_float42(a, b) + +static inline float4 bt3dGrid_make_float44(float a, float b, float c, float d) +{ + float4 t; t.x = a; t.y = b; t.z = c; t.w = d; return t; +} +#define BT_GPU_make_float44(a, b, c, d) bt3dGrid_make_float44(a, b, c, d) + +inline int3 operator+(int3 a, int3 b) +{ + return bt3dGrid_make_int3(a.x + b.x, a.y + b.y, a.z + b.z); +} + +inline float4 operator+(const float4& a, const float4& b) +{ + float4 r; r.x = a.x+b.x; r.y = a.y+b.y; r.z = a.z+b.z; r.w = a.w+b.w; return r; +} +inline float4 operator*(const float4& a, float fact) +{ + float4 r; r.x = a.x*fact; r.y = a.y*fact; r.z = a.z*fact; r.w = a.w*fact; return r; +} +inline float4 operator*(float fact, float4& a) +{ + return (a * fact); +} +inline float4& operator*=(float4& a, float fact) +{ + a = fact * a; + return a; +} +inline float4& operator+=(float4& a, const float4& b) +{ + a = a + b; + return a; +} + +inline float3 operator+(const float3& a, const float3& b) +{ + float3 r; r.x = a.x+b.x; r.y = a.y+b.y; r.z = a.z+b.z; return r; +} +inline float3 operator-(const float3& a, const float3& b) +{ + float3 r; r.x = a.x-b.x; r.y = a.y-b.y; r.z = a.z-b.z; return r; +} +static inline float bt3dGrid_dot(float3& a, float3& b) +{ + return a.x*b.x+a.y*b.y+a.z*b.z; +} +#define BT_GPU_dot(a,b) bt3dGrid_dot(a,b) + +static inline float bt3dGrid_dot4(float4& a, float4& b) +{ + return a.x*b.x+a.y*b.y+a.z*b.z+a.w*b.w; +} +#define BT_GPU_dot4(a,b) bt3dGrid_dot4(a,b) + +static inline float3 bt3dGrid_cross(const float3& a, const float3& b) +{ + float3 r; r.x = a.y*b.z-a.z*b.y; r.y = -a.x*b.z+a.z*b.x; r.z = a.x*b.y-a.y*b.x; return r; +} +#define BT_GPU_cross(a,b) bt3dGrid_cross(a,b) + + +inline float3 operator*(const float3& a, float fact) +{ + float3 r; r.x = a.x*fact; r.y = a.y*fact; r.z = a.z*fact; return r; +} + + +inline float3& operator+=(float3& a, const float3& b) +{ + a = a + b; + return a; +} +inline float3& operator-=(float3& a, const float3& b) +{ + a = a - b; + return a; +} +inline float3& operator*=(float3& a, float fact) +{ + a = a * fact; + return a; +} +inline float3 operator-(const float3& v) +{ + float3 r; r.x = -v.x; r.y = -v.y; r.z = -v.z; return r; +} + + +#define BT_GPU_FETCH(a, b) a[b] +#define BT_GPU_FETCH4(a, b) a[b] +#define BT_GPU_PREF(func) btGpu_##func +#define BT_GPU_SAFE_CALL(func) func +#define BT_GPU_Memset memset +#define BT_GPU_MemcpyToSymbol(a, b, c) memcpy(&a, b, c) +#define BT_GPU_BindTexture(a, b, c, d) +#define BT_GPU_UnbindTexture(a) + +static uint2 s_blockIdx, s_blockDim, s_threadIdx; +#define BT_GPU_blockIdx s_blockIdx +#define BT_GPU_blockDim s_blockDim +#define BT_GPU_threadIdx s_threadIdx +#define BT_GPU_EXECKERNEL(numb, numt, kfunc, args) {s_blockDim.x=numt;for(int nb=0;nb +#else +#include +#endif +#else +#ifdef USE_MINICL +#include +#else +#include +#ifdef _WIN32 +#include "CL/cl_gl.h" +#endif //_WIN32 +#endif +#endif //__APPLE__ + +#include +#include +#define oclCHECKERROR(a, b) if((a)!=(b)) { printf("OCL Error : %d\n", (a)); assert((a) == (b)); } + + +#endif //BT_OPENCL_INCLUDE_H + diff --git a/Extras/RigidBodyGpuPipeline/opencl/basic_initialize/btOpenCLUtils.cpp b/Extras/RigidBodyGpuPipeline/opencl/basic_initialize/btOpenCLUtils.cpp new file mode 100644 index 000000000..e278401e9 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/basic_initialize/btOpenCLUtils.cpp @@ -0,0 +1,731 @@ +/* +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006 - 2011 Sony Computer Entertainment Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +//original author: Roman Ponomarev +//cleanup by Erwin Coumans + +#include + +#include "btOpenCLUtils.h" +#include +#include + +#define BT_MAX_CL_DEVICES 16 //who needs 16 devices? + +#ifdef _WIN32 +#include +#include + +#define btAssert assert +#endif + +//Set the preferred platform vendor using the OpenCL SDK +static char* spPlatformVendor = +#if defined(CL_PLATFORM_MINI_CL) +"MiniCL, SCEA"; +#elif defined(CL_PLATFORM_AMD) +"Advanced Micro Devices, Inc."; +#elif defined(CL_PLATFORM_NVIDIA) +"NVIDIA Corporation"; +#elif defined(CL_PLATFORM_INTEL) +"Intel(R) Corporation"; +#else +"Unknown Vendor"; +#endif + +#ifndef CL_PLATFORM_MINI_CL +#ifdef _WIN32 +#include "CL/cl_gl.h" +#endif //_WIN32 +#endif + +int btOpenCLUtils::getNumPlatforms(cl_int* pErrNum) +{ + cl_uint numPlatforms=0; + cl_int ciErrNum = clGetPlatformIDs(0, NULL, &numPlatforms); + + if(ciErrNum != CL_SUCCESS) + { + if(pErrNum != NULL) + *pErrNum = ciErrNum; + } + return numPlatforms; +} + +const char* btOpenCLUtils::getSdkVendorName() +{ + return spPlatformVendor; +} + +cl_platform_id btOpenCLUtils::getPlatform(int platformIndex, cl_int* pErrNum) +{ + cl_platform_id platform = 0; + + cl_uint numPlatforms; + cl_int ciErrNum = clGetPlatformIDs(0, NULL, &numPlatforms); + + if (platformIndex>=0 && platformIndex=0 && preferredDeviceIndex 0) + { + cl_platform_id* platforms = new cl_platform_id[numPlatforms]; + ciErrNum = clGetPlatformIDs(numPlatforms, platforms, NULL); + if(ciErrNum != CL_SUCCESS) + { + if(pErrNum != NULL) *pErrNum = ciErrNum; + return NULL; + } + int i; + + + for ( i = 0; i < numPlatforms; ++i) + { + char pbuf[128]; + ciErrNum = clGetPlatformInfo( platforms[i], + CL_PLATFORM_VENDOR, + sizeof(pbuf), + pbuf, + NULL); + if(ciErrNum != CL_SUCCESS) + { + if(pErrNum != NULL) *pErrNum = ciErrNum; + return NULL; + } + + if (preferredPlatformIndex>=0 && i==preferredPlatformIndex) + { + cl_platform_id tmpPlatform = platforms[0]; + platforms[0] = platforms[i]; + platforms[i] = tmpPlatform; + break; + } else + { + if(!strcmp(pbuf, spPlatformVendor)) + { + cl_platform_id tmpPlatform = platforms[0]; + platforms[0] = platforms[i]; + platforms[i] = tmpPlatform; + break; + } + } + } + + for (i = 0; i < numPlatforms; ++i) + { + cl_platform_id platform = platforms[i]; + assert(platform); + + retContext = btOpenCLUtils::createContextFromPlatform(platform,deviceType,pErrNum,pGLContext,pGLDC,preferredDeviceIndex); + + if (retContext) + { +// printf("OpenCL platform details:\n"); + btOpenCLPlatformInfo platformInfo; + + btOpenCLUtils::getPlatformInfo(platform, platformInfo); + + printf(" CL_PLATFORM_VENDOR: \t\t\t%s\n",platformInfo.m_platformVendor); + printf(" CL_PLATFORM_NAME: \t\t\t%s\n",platformInfo.m_platformName); + printf(" CL_PLATFORM_VERSION: \t\t\t%s\n",platformInfo.m_platformVersion); + + break; + } + } + + delete[] platforms; + } + return retContext; +} + + +////////////////////////////////////////////////////////////////////////////// +//! Gets the id of the nth device from the context +//! +//! @return the id or -1 when out of range +//! @param cxMainContext OpenCL context +//! @param device_idx index of the device of interest +////////////////////////////////////////////////////////////////////////////// +cl_device_id btOpenCLUtils::getDevice(cl_context cxMainContext, int deviceIndex) +{ + size_t szParmDataBytes; + cl_device_id* cdDevices; + + // get the list of devices associated with context + clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes); + + if( szParmDataBytes / sizeof(cl_device_id) < deviceIndex ) { + return (cl_device_id)-1; + } + + cdDevices = (cl_device_id*) malloc(szParmDataBytes); + + clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL); + + cl_device_id device = cdDevices[deviceIndex]; + free(cdDevices); + + return device; +} + +int btOpenCLUtils::getNumDevices(cl_context cxMainContext) +{ + size_t szParamDataBytes; + clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &szParamDataBytes); + int device_count = (int) szParamDataBytes/ sizeof(cl_device_id); + return device_count; +} + +void btOpenCLUtils::printDeviceInfo(cl_device_id device) +{ + btOpenCLDeviceInfo info; + getDeviceInfo(device,info); + + printf(" CL_DEVICE_NAME: \t\t\t%s\n", info.m_deviceName); + printf(" CL_DEVICE_VENDOR: \t\t\t%s\n", info.m_deviceVendor); + printf(" CL_DRIVER_VERSION: \t\t\t%s\n", info.m_driverVersion); + + if( info.m_deviceType & CL_DEVICE_TYPE_CPU ) + printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_CPU"); + if( info.m_deviceType & CL_DEVICE_TYPE_GPU ) + printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_GPU"); + if( info.m_deviceType & CL_DEVICE_TYPE_ACCELERATOR ) + printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_ACCELERATOR"); + if( info.m_deviceType & CL_DEVICE_TYPE_DEFAULT ) + printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_DEFAULT"); + + printf(" CL_DEVICE_MAX_COMPUTE_UNITS:\t\t%u\n", info.m_computeUnits); + printf(" CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:\t%u\n", info.m_workitemDims); + printf(" CL_DEVICE_MAX_WORK_ITEM_SIZES:\t%u / %u / %u \n", info.m_workItemSize[0], info.m_workItemSize[1], info.m_workItemSize[2]); + printf(" CL_DEVICE_MAX_WORK_GROUP_SIZE:\t%u\n", info.m_workgroupSize); + printf(" CL_DEVICE_MAX_CLOCK_FREQUENCY:\t%u MHz\n", info.m_clockFrequency); + printf(" CL_DEVICE_ADDRESS_BITS:\t\t%u\n", info.m_addressBits); + printf(" CL_DEVICE_MAX_MEM_ALLOC_SIZE:\t\t%u MByte\n", (unsigned int)(info.m_maxMemAllocSize/ (1024 * 1024))); + printf(" CL_DEVICE_GLOBAL_MEM_SIZE:\t\t%u MByte\n", (unsigned int)(info.m_globalMemSize/ (1024 * 1024))); + printf(" CL_DEVICE_ERROR_CORRECTION_SUPPORT:\t%s\n", info.m_errorCorrectionSupport== CL_TRUE ? "yes" : "no"); + printf(" CL_DEVICE_LOCAL_MEM_TYPE:\t\t%s\n", info.m_localMemType == 1 ? "local" : "global"); + printf(" CL_DEVICE_LOCAL_MEM_SIZE:\t\t%u KByte\n", (unsigned int)(info.m_localMemSize / 1024)); + printf(" CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:\t%u KByte\n", (unsigned int)(info.m_constantBufferSize / 1024)); + if( info.m_queueProperties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE ) + printf(" CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE"); + if( info.m_queueProperties & CL_QUEUE_PROFILING_ENABLE ) + printf(" CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_PROFILING_ENABLE"); + + printf(" CL_DEVICE_IMAGE_SUPPORT:\t\t%u\n", info.m_imageSupport); + + printf(" CL_DEVICE_MAX_READ_IMAGE_ARGS:\t%u\n", info.m_maxReadImageArgs); + printf(" CL_DEVICE_MAX_WRITE_IMAGE_ARGS:\t%u\n", info.m_maxWriteImageArgs); + printf("\n CL_DEVICE_IMAGE "); + printf("\t\t\t2D_MAX_WIDTH\t %u\n", info.m_image2dMaxWidth); + printf("\t\t\t\t\t2D_MAX_HEIGHT\t %u\n", info.m_image2dMaxHeight); + printf("\t\t\t\t\t3D_MAX_WIDTH\t %u\n", info.m_image3dMaxWidth); + printf("\t\t\t\t\t3D_MAX_HEIGHT\t %u\n", info.m_image3dMaxHeight); + printf("\t\t\t\t\t3D_MAX_DEPTH\t %u\n", info.m_image3dMaxDepth); + if (info.m_deviceExtensions != 0) + printf("\n CL_DEVICE_EXTENSIONS:%s\n",info.m_deviceExtensions); + else + printf(" CL_DEVICE_EXTENSIONS: None\n"); + printf(" CL_DEVICE_PREFERRED_VECTOR_WIDTH_\t"); + printf("CHAR %u, SHORT %u, INT %u,LONG %u, FLOAT %u, DOUBLE %u\n\n\n", + info.m_vecWidthChar, info.m_vecWidthShort, info.m_vecWidthInt, info.m_vecWidthLong,info.m_vecWidthFloat, info.m_vecWidthDouble); + + +} + +void btOpenCLUtils::getDeviceInfo(cl_device_id device, btOpenCLDeviceInfo& info) +{ + + // CL_DEVICE_NAME + clGetDeviceInfo(device, CL_DEVICE_NAME, BT_MAX_STRING_LENGTH, &info.m_deviceName, NULL); + + // CL_DEVICE_VENDOR + clGetDeviceInfo(device, CL_DEVICE_VENDOR, BT_MAX_STRING_LENGTH, &info.m_deviceVendor, NULL); + + // CL_DRIVER_VERSION + clGetDeviceInfo(device, CL_DRIVER_VERSION, BT_MAX_STRING_LENGTH, &info.m_driverVersion, NULL); + + // CL_DEVICE_INFO + clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(cl_device_type), &info.m_deviceType, NULL); + + // CL_DEVICE_MAX_COMPUTE_UNITS + clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(info.m_computeUnits), &info.m_computeUnits, NULL); + + // CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS + clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(info.m_workitemDims), &info.m_workitemDims, NULL); + + // CL_DEVICE_MAX_WORK_ITEM_SIZES + clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(info.m_workItemSize), &info.m_workItemSize, NULL); + + // CL_DEVICE_MAX_WORK_GROUP_SIZE + clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(info.m_workgroupSize), &info.m_workgroupSize, NULL); + + // CL_DEVICE_MAX_CLOCK_FREQUENCY + clGetDeviceInfo(device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(info.m_clockFrequency), &info.m_clockFrequency, NULL); + + // CL_DEVICE_ADDRESS_BITS + clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(info.m_addressBits), &info.m_addressBits, NULL); + + // CL_DEVICE_MAX_MEM_ALLOC_SIZE + clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(info.m_maxMemAllocSize), &info.m_maxMemAllocSize, NULL); + + // CL_DEVICE_GLOBAL_MEM_SIZE + clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(info.m_globalMemSize), &info.m_globalMemSize, NULL); + + // CL_DEVICE_ERROR_CORRECTION_SUPPORT + clGetDeviceInfo(device, CL_DEVICE_ERROR_CORRECTION_SUPPORT, sizeof(info.m_errorCorrectionSupport), &info.m_errorCorrectionSupport, NULL); + + // CL_DEVICE_LOCAL_MEM_TYPE + clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(info.m_localMemType), &info.m_localMemType, NULL); + + // CL_DEVICE_LOCAL_MEM_SIZE + clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(info.m_localMemSize), &info.m_localMemSize, NULL); + + // CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE + clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(info.m_constantBufferSize), &info.m_constantBufferSize, NULL); + + // CL_DEVICE_QUEUE_PROPERTIES + clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES, sizeof(info.m_queueProperties), &info.m_queueProperties, NULL); + + // CL_DEVICE_IMAGE_SUPPORT + clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(info.m_imageSupport), &info.m_imageSupport, NULL); + + // CL_DEVICE_MAX_READ_IMAGE_ARGS + clGetDeviceInfo(device, CL_DEVICE_MAX_READ_IMAGE_ARGS, sizeof(info.m_maxReadImageArgs), &info.m_maxReadImageArgs, NULL); + + // CL_DEVICE_MAX_WRITE_IMAGE_ARGS + clGetDeviceInfo(device, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, sizeof(info.m_maxWriteImageArgs), &info.m_maxWriteImageArgs, NULL); + + // CL_DEVICE_IMAGE2D_MAX_WIDTH, CL_DEVICE_IMAGE2D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_WIDTH, CL_DEVICE_IMAGE3D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_DEPTH + clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &info.m_image2dMaxWidth, NULL); + clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &info.m_image2dMaxHeight, NULL); + clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(size_t), &info.m_image3dMaxWidth, NULL); + clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(size_t), &info.m_image3dMaxHeight, NULL); + clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(size_t), &info.m_image3dMaxDepth, NULL); + + // CL_DEVICE_EXTENSIONS: get device extensions, and if any then parse & log the string onto separate lines + clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, BT_MAX_STRING_LENGTH, &info.m_deviceExtensions, NULL); + + // CL_DEVICE_PREFERRED_VECTOR_WIDTH_ + clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, sizeof(cl_uint), &info.m_vecWidthChar, NULL); + clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, sizeof(cl_uint), &info.m_vecWidthShort, NULL); + clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), &info.m_vecWidthInt, NULL); + clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, sizeof(cl_uint), &info.m_vecWidthLong, NULL); + clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(cl_uint), &info.m_vecWidthFloat, NULL); + clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, sizeof(cl_uint), &info.m_vecWidthDouble, NULL); +} + +static const char* strip2(const char* name, const char* pattern) +{ + size_t const patlen = strlen(pattern); + size_t patcnt = 0; + const char * oriptr; + const char * patloc; + // find how many times the pattern occurs in the original string + for (oriptr = name; patloc = strstr(oriptr, pattern); oriptr = patloc + patlen) + { + patcnt++; + } + return oriptr; +} + +cl_program btOpenCLUtils::compileCLProgramFromString(cl_context clContext, cl_device_id device, const char* kernelSource, cl_int* pErrNum, const char* additionalMacros , const char* clFileNameForCaching) +{ + + cl_program m_cpProgram=0; + cl_int status; + + char binaryFileName[522]; + + if (clFileNameForCaching) + { + + char deviceName[256]; + char driverVersion[256]; + clGetDeviceInfo(device, CL_DEVICE_NAME, 256, &deviceName, NULL); + clGetDeviceInfo(device, CL_DRIVER_VERSION, 256, &driverVersion, NULL); + + + const char* strippedName = strip2(clFileNameForCaching,"\\"); + strippedName = strip2(strippedName,"/"); + + sprintf_s(binaryFileName,"cache/%s.%s.%s.bin",strippedName, deviceName,driverVersion ); + //printf("searching for %s\n", binaryFileName); + + bool fileUpToDate = false; + bool binaryFileValid=false; + + FILETIME modtimeBinary; + +#ifdef _WIN32 + CreateDirectory("cache",0); + { + + HANDLE binaryFileHandle = CreateFile(binaryFileName,GENERIC_READ,0,0,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,0); + if (binaryFileHandle ==INVALID_HANDLE_VALUE) + { + DWORD errorCode; + errorCode = GetLastError(); + switch (errorCode) + { + case ERROR_FILE_NOT_FOUND: + { + printf("\nCached file not found %s\n", binaryFileName); + break; + } + case ERROR_PATH_NOT_FOUND: + { + printf("\nCached file path not found %s\n", binaryFileName); + break; + } + default: + { + printf("\nFailed reading cached file with errorCode = %d\n", errorCode); + } + } + } else + { + if (GetFileTime(binaryFileHandle, NULL, NULL, &modtimeBinary)==0) + { + DWORD errorCode; + errorCode = GetLastError(); + printf("\nGetFileTime errorCode = %d\n", errorCode); + } else + { + binaryFileValid = true; + } + CloseHandle(binaryFileHandle); + } + + if (binaryFileValid) + { + HANDLE srcFileHandle = CreateFile(clFileNameForCaching,GENERIC_READ,0,0,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,0); + if (srcFileHandle!=INVALID_HANDLE_VALUE) + { + FILETIME modtimeSrc; + if (GetFileTime(srcFileHandle, NULL, NULL, &modtimeSrc)==0) + { + DWORD errorCode; + errorCode = GetLastError(); + printf("\nGetFileTime errorCode = %d\n", errorCode); + } + if ( ( modtimeSrc.dwHighDateTime < modtimeBinary.dwHighDateTime) + ||(( modtimeSrc.dwHighDateTime == modtimeBinary.dwHighDateTime)&&(modtimeSrc.dwLowDateTime <= modtimeBinary.dwLowDateTime))) + { + fileUpToDate=true; + } else + { + printf("\nCached binary file out-of-date (%s)\n",binaryFileName); + } + CloseHandle(srcFileHandle); + } + else + { +#ifdef _DEBUG + DWORD errorCode; + errorCode = GetLastError(); + switch (errorCode) + { + case ERROR_FILE_NOT_FOUND: + { + printf("\nSrc file not found %s\n", clFileNameForCaching); + break; + } + case ERROR_PATH_NOT_FOUND: + { + printf("\nSrc path not found %s\n", clFileNameForCaching); + break; + } + default: + { + printf("\nnSrc file reading errorCode = %d\n", errorCode); + } + } + + //we should make sure the src file exists so we can verify the timestamp with binary + assert(0); +#else + //if we cannot find the source, assume it is OK in release builds + fileUpToDate = true; +#endif + } + } + + + } + + if( fileUpToDate) + { + FILE* file = fopen(binaryFileName, "rb"); + if (file) + { + fseek( file, 0L, SEEK_END ); + size_t binarySize = ftell( file ); + rewind( file ); + char* binary = new char[binarySize]; + fread( binary, sizeof(char), binarySize, file ); + fclose( file ); + + m_cpProgram = clCreateProgramWithBinary( clContext, 1,&device, &binarySize, (const unsigned char**)&binary, 0, &status ); + btAssert( status == CL_SUCCESS ); + status = clBuildProgram( m_cpProgram, 1, &device, additionalMacros, 0, 0 ); + btAssert( status == CL_SUCCESS ); + + if( status != CL_SUCCESS ) + { + char *build_log; + size_t ret_val_size; + clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size); + build_log = new char[ret_val_size+1]; + clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL); + build_log[ret_val_size] = '\0'; + printf("%s\n", build_log); + delete build_log; + btAssert(0); + m_cpProgram = 0; + } + delete[] binary; + } + } +#endif //_WIN32 + + } + + if (!m_cpProgram) + { + cl_kernel kernel; + cl_int localErrNum; + size_t program_length = strlen(kernelSource); + + m_cpProgram = clCreateProgramWithSource(clContext, 1, (const char**)&kernelSource, &program_length, &localErrNum); + if (localErrNum!= CL_SUCCESS) + { + if (pErrNum) + *pErrNum = localErrNum; + return 0; + } + + // Build the program with 'mad' Optimization option + + + #ifdef MAC + char* flags = "-cl-mad-enable -DMAC -DGUID_ARG"; + #else + //const char* flags = "-DGUID_ARG= -fno-alias"; + const char* flags = "-DGUID_ARG= "; + #endif + + char* compileFlags = new char[strlen(additionalMacros) + strlen(flags) + 5]; + sprintf(compileFlags, "%s %s", flags, additionalMacros); + localErrNum = clBuildProgram(m_cpProgram, 1, &device, compileFlags, NULL, NULL); + if (localErrNum!= CL_SUCCESS) + { + char *build_log; + size_t ret_val_size; + clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size); + build_log = new char[ret_val_size+1]; + clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL); + + // to be carefully, terminate with \0 + // there's no information in the reference whether the string is 0 terminated or not + build_log[ret_val_size] = '\0'; + + + printf("Error in clBuildProgram, Line %u in file %s, Log: \n%s\n !!!\n\n", __LINE__, __FILE__, build_log); + delete[] build_log; + if (pErrNum) + *pErrNum = localErrNum; + return 0; + } + + if( clFileNameForCaching ) + { // write to binary + + cl_uint numAssociatedDevices; + status = clGetProgramInfo( m_cpProgram, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &numAssociatedDevices, 0 ); + btAssert( status == CL_SUCCESS ); + if (numAssociatedDevices==1) + { + + size_t binarySize; + status = clGetProgramInfo( m_cpProgram, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binarySize, 0 ); + btAssert( status == CL_SUCCESS ); + + char* binary = new char[binarySize]; + + status = clGetProgramInfo( m_cpProgram, CL_PROGRAM_BINARIES, sizeof(char*), &binary, 0 ); + btAssert( status == CL_SUCCESS ); + + { + FILE* file = fopen(binaryFileName, "wb"); + if (file) + { + fwrite( binary, sizeof(char), binarySize, file ); + fclose( file ); + } else + { + printf("cannot write file %s\n", binaryFileName); + } + } + + delete [] binary; + } + } + delete [] compileFlags; + } + + return m_cpProgram; +} + + +cl_kernel btOpenCLUtils::compileCLKernelFromString(cl_context clContext, cl_device_id device, const char* kernelSource, const char* kernelName, cl_int* pErrNum, cl_program prog, const char* additionalMacros ) +{ + printf("compiling kernel %s ",kernelName); + cl_kernel kernel; + cl_int localErrNum; + size_t program_length = strlen(kernelSource); + + + cl_program m_cpProgram = prog; + if (!m_cpProgram) + { + m_cpProgram = compileCLProgramFromString(clContext,device,kernelSource,pErrNum, additionalMacros); + } + + + // Create the kernel + kernel = clCreateKernel(m_cpProgram, kernelName, &localErrNum); + if (localErrNum != CL_SUCCESS) + { + printf("Error in clCreateKernel, Line %u in file %s, cannot find kernel function %s !!!\n\n", __LINE__, __FILE__, kernelName); + if (pErrNum) + *pErrNum = localErrNum; + return 0; + } + + if (!prog && m_cpProgram) + { + clReleaseProgram(m_cpProgram); + } + printf("ready. \n"); + + + if (pErrNum) + *pErrNum = CL_SUCCESS; + return kernel; + +} diff --git a/Extras/RigidBodyGpuPipeline/opencl/basic_initialize/btOpenCLUtils.h b/Extras/RigidBodyGpuPipeline/opencl/basic_initialize/btOpenCLUtils.h new file mode 100644 index 000000000..4e41b415b --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/basic_initialize/btOpenCLUtils.h @@ -0,0 +1,104 @@ +/* +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006 - 2011 Sony Computer Entertainment Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +//original author: Roman Ponomarev +//cleanup by Erwin Coumans + +#ifndef BT_OPENCL_UTILS_H +#define BT_OPENCL_UTILS_H + +#include "btOpenCLInclude.h" + + +#define BT_MAX_STRING_LENGTH 1024 + +struct btOpenCLDeviceInfo +{ + char m_deviceName[BT_MAX_STRING_LENGTH]; + char m_deviceVendor[BT_MAX_STRING_LENGTH]; + char m_driverVersion[BT_MAX_STRING_LENGTH]; + char m_deviceExtensions[BT_MAX_STRING_LENGTH]; + + cl_device_type m_deviceType; + cl_uint m_computeUnits; + size_t m_workitemDims; + size_t m_workItemSize[3]; + size_t m_image2dMaxWidth; + size_t m_image2dMaxHeight; + size_t m_image3dMaxWidth; + size_t m_image3dMaxHeight; + size_t m_image3dMaxDepth; + size_t m_workgroupSize; + cl_uint m_clockFrequency; + cl_ulong m_constantBufferSize; + cl_ulong m_localMemSize; + cl_ulong m_globalMemSize; + cl_bool m_errorCorrectionSupport; + cl_device_local_mem_type m_localMemType; + cl_uint m_maxReadImageArgs; + cl_uint m_maxWriteImageArgs; + + + + cl_uint m_addressBits; + cl_ulong m_maxMemAllocSize; + cl_command_queue_properties m_queueProperties; + cl_bool m_imageSupport; + cl_uint m_vecWidthChar; + cl_uint m_vecWidthShort; + cl_uint m_vecWidthInt; + cl_uint m_vecWidthLong; + cl_uint m_vecWidthFloat; + cl_uint m_vecWidthDouble; + +}; + +struct btOpenCLPlatformInfo +{ + char m_platformVendor[BT_MAX_STRING_LENGTH]; + char m_platformName[BT_MAX_STRING_LENGTH]; + char m_platformVersion[BT_MAX_STRING_LENGTH]; +}; + +class btOpenCLUtils +{ +public: + + /// CL Context optionally takes a GL context. This is a generic type because we don't really want this code + /// to have to understand GL types. It is a HGLRC in _WIN32 or a GLXContext otherwise. + static cl_context createContextFromType(cl_device_type deviceType, cl_int* pErrNum, void* pGLCtx = 0, void* pGLDC = 0, int preferredDeviceIndex = -1, int preferredPlatformIndex= - 1); + + static int getNumDevices(cl_context cxMainContext); + static cl_device_id getDevice(cl_context cxMainContext, int nr); + static void getDeviceInfo(cl_device_id device, btOpenCLDeviceInfo& info); + static void printDeviceInfo(cl_device_id device); + + static cl_kernel compileCLKernelFromString( cl_context clContext,cl_device_id device, const char* kernelSource, const char* kernelName, cl_int* pErrNum=0, cl_program prog=0,const char* additionalMacros = "" ); + + //optional + static cl_program compileCLProgramFromString( cl_context clContext,cl_device_id device, const char* kernelSource, cl_int* pErrNum=0,const char* additionalMacros = "" , const char* srcFileNameForCaching=0); + + //the following optional APIs provide access using specific platform information + static int getNumPlatforms(cl_int* pErrNum=0); + ///get the nr'th platform, where nr is in the range [0..getNumPlatforms) + static cl_platform_id getPlatform(int nr, cl_int* pErrNum=0); + static void getPlatformInfo(cl_platform_id platform, btOpenCLPlatformInfo& platformInfo); + static const char* getSdkVendorName(); + static cl_context createContextFromPlatform(cl_platform_id platform, cl_device_type deviceType, cl_int* pErrNum, void* pGLCtx = 0, void* pGLDC = 0,int preferredDeviceIndex = -1, int preferredPlatformIndex= -1); +}; + + + +#endif // BT_OPENCL_UTILS_H diff --git a/Extras/RigidBodyGpuPipeline/opencl/basic_initialize/main.cpp b/Extras/RigidBodyGpuPipeline/opencl/basic_initialize/main.cpp new file mode 100644 index 000000000..2890d7d19 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/basic_initialize/main.cpp @@ -0,0 +1,92 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +///original author: Erwin Coumans + +#include "btOpenCLUtils.h" +#include + +cl_context g_cxMainContext; +cl_command_queue g_cqCommandQue; + + + +int main(int argc, char* argv[]) +{ + int ciErrNum = 0; + + cl_device_type deviceType = CL_DEVICE_TYPE_ALL; + const char* vendorSDK = btOpenCLUtils::getSdkVendorName(); + + printf("This program was compiled using the %s OpenCL SDK\n",vendorSDK); + int numPlatforms = btOpenCLUtils::getNumPlatforms(); + printf("Num Platforms = %d\n", numPlatforms); + + for (int i=0;i=0) + { + btAABBCL minAabb = plocalShapeAABB[shapeIndex*2]; + btAABBCL maxAabb = plocalShapeAABB[shapeIndex*2+1]; + + float4 halfExtents = ((float4)(maxAabb.fx - minAabb.fx,maxAabb.fy - minAabb.fy,maxAabb.fz - minAabb.fz,0.f))*0.5f; + + Matrix3x3 abs_b = qtGetRotationMatrix(orientation); + float4 extent = (float4) ( dot(abs_b.m_row[0],halfExtents),dot(abs_b.m_row[1],halfExtents),dot(abs_b.m_row[2],halfExtents),0.f); + + + pAABB[nodeID*2].fx = position.x-extent.x; + pAABB[nodeID*2].fy = position.y-extent.y; + pAABB[nodeID*2].fz = position.z-extent.z; + pAABB[nodeID*2].uw = nodeID; + + pAABB[nodeID*2+1].fx = position.x+extent.x; + pAABB[nodeID*2+1].fy = position.y+extent.y; + pAABB[nodeID*2+1].fz = position.z+extent.z; + pAABB[nodeID*2+1].uw = nodeID; + } + } +} + + +__kernel void + broadphaseColorKernel( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer, __global int2* pOverlappingPairs, const int numOverlap) +{ + int nodeID = get_global_id(0); + if( nodeID < numOverlap ) + { + int2 pair = pOverlappingPairs[nodeID]; + float4 red = (float4)(1.f,0.4f,0.4f,1.f); + + g_vertexBuffer[pair.x + startOffset/4+numNodes+numNodes] = red; + g_vertexBuffer[pair.y + startOffset/4+numNodes+numNodes] = red; + } +} + + + +__kernel void + broadphaseKernel( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer) +{ + int nodeID = get_global_id(0); + +// float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254); + + if( nodeID < numNodes ) + { + float4 position = g_vertexBuffer[nodeID + startOffset/4]; + //float4 orientation = g_vertexBuffer[nodeID + startOffset/4+numNodes]; + float4 color = g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes]; + + float4 red = (float4)(1.f,0.f,0.f,0.f); + float4 green = (float4)(0.f,1.f,0.f,0.f); + float4 blue = (float4)(0.f,0.f,1.f,0.f); + float overlap=0; + int equal = 0; + + g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes] = green; + + for (int i=0;i0.f) + g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes]=red*overlap; + else + g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes]=green; + } + } +} + +); \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/broadphase_benchmark/btGridBroadphaseCL.cpp b/Extras/RigidBodyGpuPipeline/opencl/broadphase_benchmark/btGridBroadphaseCL.cpp new file mode 100644 index 000000000..9e0ea62cc --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/broadphase_benchmark/btGridBroadphaseCL.cpp @@ -0,0 +1,231 @@ + +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Roman Ponomarev, Erwin Coumans + +#ifdef RELEASE_ME +#define COMPUTE_AABB_KERNEL_PATH "computeAabbKernelOCL.cl" +#else +#define COMPUTE_AABB_KERNEL_PATH "..\\..\\opencl\\broadphase_benchmark\\computeAabbKernelOCL" +#endif + + +#include "btGridBroadphaseCl.h" +#include "LinearMath/btQuickprof.h" +#include "Adl/Adl.h" +#include "AdlPrimitives/Math/Math.h" + +#include "Adl/AdlKernel.h" +#include "../basic_initialize/btOpenCLUtils.h" +#define MSTRINGIFY(A) #A +static const char* spComputeAabbSource= +#include "computeAabbKernelOCL.cl" + +struct btTmpAabb +{ + float minfx; + float minfy; + float minfz; + unsigned int index0; + float maxfx; + float maxfy; + float maxfz; + unsigned int index1; +} ; + + + + +btGridBroadphaseCl::btGridBroadphaseCl( btOverlappingPairCache* overlappingPairCache, + const btVector3& cellSize, + int gridSizeX, int gridSizeY, int gridSizeZ, + int maxSmallProxies, int maxLargeProxies, int maxPairsPerSmallProxy, + btScalar maxSmallProxySize, + int maxSmallProxiesPerCell, + cl_context context, + cl_device_id device, + cl_command_queue queue, + adl::DeviceCL* deviceCL) +:bt3dGridBroadphaseOCL(overlappingPairCache,cellSize, + gridSizeX, gridSizeY, gridSizeZ, + maxSmallProxies, maxLargeProxies, maxPairsPerSmallProxy, + maxSmallProxySize,maxSmallProxiesPerCell, + context,device,queue,deviceCL) +{ + m_computeAabbKernel = m_deviceCL->getKernel(COMPUTE_AABB_KERNEL_PATH,"computeAabb","",spComputeAabbSource); + + m_countOverlappingPairs = m_deviceCL->getKernel(COMPUTE_AABB_KERNEL_PATH,"countOverlappingpairs","",spComputeAabbSource); + + m_squeezePairCaches = m_deviceCL->getKernel(COMPUTE_AABB_KERNEL_PATH,"squeezePairCaches","",spComputeAabbSource); + + m_aabbConstBuffer = new adl::Buffer(m_deviceCL,1,adl::BufferBase::BUFFER_CONST); + + size_t memSize = m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int)*2; + cl_int ciErrNum=0; + m_dAllOverlappingPairs = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum); + + memset(m_hAllOverlappingPairs, 0x00, sizeof(MyUint2)*m_maxHandles * m_maxPairsPerBody); + copyArrayToDevice(m_dAllOverlappingPairs, m_hAllOverlappingPairs, m_maxHandles * m_maxPairsPerBody * sizeof(MyUint2)); + + + + oclCHECKERROR(ciErrNum, CL_SUCCESS); + + + +} + +btGridBroadphaseCl::~btGridBroadphaseCl() +{ + clReleaseMemObject(m_dAllOverlappingPairs); + + delete m_aabbConstBuffer; + +} + + + +void btGridBroadphaseCl::prepareAABB(float* positions, int numObjects) +{ + return; +#if 0 +bt3dGridBroadphaseOCL::prepareAABB(); +#else + BT_PROFILE("prepareAABB"); + bt3DGrid3F1U* pBB = m_hAABB; + + int new_largest_index = numObjects; + unsigned int num_small = numObjects; + m_LastHandleIndex = new_largest_index; + new_largest_index = -1; + unsigned int num_large = 0; + m_LastLargeHandleIndex = new_largest_index; + // paranoid checks + //btAssert(num_small == m_numHandles); + //btAssert(num_large == m_numLargeHandles); + + //copyArrayFromDevice( m_hAABB, m_dAABB, sizeof(bt3DGrid3F1U) * 2 * (m_numHandles + m_numLargeHandles)); + //clFinish(m_cqCommandQue); +#endif + +} +void btGridBroadphaseCl::calcHashAABB() +{ + bt3dGridBroadphaseOCL::calcHashAABB(); +} + + +void btGridBroadphaseCl::calculateOverlappingPairs(float* positions, int numObjects) +{ + btDispatcher* dispatcher=0; + + // update constants + { + BT_PROFILE("setParameters"); + setParameters(&m_params); + } + + // prepare AABB array + { + BT_PROFILE("prepareAABB"); + prepareAABB(positions, numObjects); + } + // calculate hash + { + BT_PROFILE("calcHashAABB"); + calcHashAABB(); + } + + { + BT_PROFILE("sortHash"); + // sort bodies based on hash + sortHash(); + } + + // find start of each cell + { + BT_PROFILE("findCellStart"); + findCellStart(); + } + + { + BT_PROFILE("findOverlappingPairs"); + // findOverlappingPairs (small/small) + findOverlappingPairs(); + } + + // add pairs to CPU cache + { + BT_PROFILE("computePairCacheChanges"); +#if 0 + computePairCacheChanges(); +#else + int ciErrNum=0; + + ciErrNum=clSetKernelArg((cl_kernel)m_countOverlappingPairs->m_kernel, 0, sizeof(int), (void*)&numObjects); + ciErrNum=clSetKernelArg((cl_kernel)m_countOverlappingPairs->m_kernel, 1, sizeof(cl_mem),(void*)&m_dPairBuff); + ciErrNum=clSetKernelArg((cl_kernel)m_countOverlappingPairs->m_kernel, 2, sizeof(cl_mem),(void*)&m_dPairBuffStartCurr); + ciErrNum=clSetKernelArg((cl_kernel)m_countOverlappingPairs->m_kernel, 3, sizeof(cl_mem),(void*)&m_dPairScanChanged); + ciErrNum=clSetKernelArg((cl_kernel)m_countOverlappingPairs->m_kernel, 4, sizeof(cl_mem),(void*)&m_dAABB); + + + size_t localWorkSize=64; + size_t numWorkItems = localWorkSize*((numObjects+ (localWorkSize)) / localWorkSize); + + + ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, (cl_kernel)m_countOverlappingPairs->m_kernel, 1, NULL, &numWorkItems, &localWorkSize, 0,0,0 ); +oclCHECKERROR(ciErrNum, CL_SUCCESS); + ciErrNum = clFlush(m_cqCommandQue); +#endif + + + } + { + BT_PROFILE("scanOverlappingPairBuff"); + scanOverlappingPairBuff(false); + } + { + BT_PROFILE("squeezeOverlappingPairBuff"); +//#define FORCE_CPU +#ifdef FORCE_CPU + bt3dGridBroadphaseOCL::squeezeOverlappingPairBuff(); + copyArrayToDevice(m_dPairsChangedXY, m_hPairsChangedXY, sizeof( MyUint2) * m_numPrefixSum); //gSum +#else + //squeezeOverlappingPairBuff(); + int ciErrNum = 0; + ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 0, sizeof(int), (void*)&numObjects); + ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 1, sizeof(cl_mem),(void*)&m_dPairBuff); + ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 2, sizeof(cl_mem),(void*)&m_dPairBuffStartCurr); + ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 3, sizeof(cl_mem),(void*)&m_dPairScanChanged); + ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 4, sizeof(cl_mem),(void*)&m_dAllOverlappingPairs); + ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 5, sizeof(cl_mem),(void*)&m_dAABB); + + size_t workGroupSize = 64; + size_t numWorkItems = workGroupSize*((numObjects+ (workGroupSize)) / workGroupSize); + + + ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, (cl_kernel)m_squeezePairCaches->m_kernel, 1, NULL, &numWorkItems, &workGroupSize, 0,0,0 ); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + + +// copyArrayFromDevice(m_hAllOverlappingPairs, m_dAllOverlappingPairs, sizeof(unsigned int) * m_numPrefixSum*2); //gSum +// clFinish(m_cqCommandQue); +#endif + + } + + + return; +} + diff --git a/Extras/RigidBodyGpuPipeline/opencl/broadphase_benchmark/btGridBroadphaseCL.h b/Extras/RigidBodyGpuPipeline/opencl/broadphase_benchmark/btGridBroadphaseCL.h new file mode 100644 index 000000000..7f064488d --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/broadphase_benchmark/btGridBroadphaseCL.h @@ -0,0 +1,73 @@ + +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Roman Ponomarev, Erwin Coumans + +#ifndef GRID_BROADPHASE_CL_H +#define GRID_BROADPHASE_CL_H + +#include "../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.h" + +#include "Adl/Adl.h" +#include "Adl/AdlKernel.h" + + +struct MyAabbConstData +{ + int bla; + int numElem; +}; + + + +class btGridBroadphaseCl : public bt3dGridBroadphaseOCL +{ +protected: + + adl::Kernel* m_computeAabbKernel; + adl::Kernel* m_countOverlappingPairs; + adl::Kernel* m_squeezePairCaches; + + + adl::Buffer* m_aabbConstBuffer; + + + public: + + cl_mem m_dAllOverlappingPairs; + + + btGridBroadphaseCl( btOverlappingPairCache* overlappingPairCache, + const btVector3& cellSize, + int gridSizeX, int gridSizeY, int gridSizeZ, + int maxSmallProxies, int maxLargeProxies, int maxPairsPerSmallProxy, + btScalar maxSmallProxySize, + int maxSmallProxiesPerCell = 4, + cl_context context = NULL, + cl_device_id device = NULL, + cl_command_queue queue = NULL, + adl::DeviceCL* deviceCL=0 + ); + + virtual void prepareAABB(float* positions, int numObjects); + virtual void calcHashAABB(); + + void calculateOverlappingPairs(float* positions, int numObjects); + + virtual ~btGridBroadphaseCl(); + +}; + +#endif //GRID_BROADPHASE_CL_H + diff --git a/Extras/RigidBodyGpuPipeline/opencl/broadphase_benchmark/computeAabbKernelOCL.cl b/Extras/RigidBodyGpuPipeline/opencl/broadphase_benchmark/computeAabbKernelOCL.cl new file mode 100644 index 000000000..3cf7550c0 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/broadphase_benchmark/computeAabbKernelOCL.cl @@ -0,0 +1,112 @@ +MSTRINGIFY( + +typedef struct +{ + int bla; + int numElem; +} MyAabbConstDataCL ; + +typedef struct +{ + float minfx; + float minfy; + float minfz; + unsigned int index0; + float maxfx; + float maxfy; + float maxfz; + unsigned int index1; +} btAabbCL; + + +__kernel void computeAabb( __global btAabbCL* aabbs,__global float4* positions, MyAabbConstDataCL cb) +{ + int nodeID = get_global_id(0); + + if( nodeID < cb.numElem ) + { + aabbs[nodeID].minfx = positions[nodeID].x -1.f; + aabbs[nodeID].minfy = positions[nodeID].y -1.f; + aabbs[nodeID].minfz = positions[nodeID].z -1.f; + aabbs[nodeID].index0 = nodeID; + aabbs[nodeID].maxfx = positions[nodeID].x +1.f; + aabbs[nodeID].maxfy = positions[nodeID].y +1.f; + aabbs[nodeID].maxfz = positions[nodeID].z +1.f; + aabbs[nodeID].index1 = nodeID; + } +} + + +__kernel void countOverlappingpairs( int numObjects, + __global int* pPairBuff, + __global int2* pPairBuffStartCurr, + __global int* pPairScan, + __global float4* pAABB ) +{ + int index = get_global_id(0); + if(index >= numObjects) + { + return; + } + float4 bbMin = pAABB[index * 2]; + int handleIndex = as_int(bbMin.w); + int2 start_curr = pPairBuffStartCurr[handleIndex]; + int start = start_curr.x; + int curr = start_curr.y; + __global int *pInp = pPairBuff + start; + int num_changes = 0; + for(int k = 0; k < curr; k++, pInp++) + { + if(((*pInp) & 0x60000000))//either new or existing pairs (ignore old non-overlapping pairs) + { + num_changes++; + } + } + pPairScan[index+1] = num_changes; +} + + +__kernel void squeezePairCaches( int numObjects, + __global int* pPairBuff, + __global int2* pPairBuffStartCurr, + __global int* pPairScan, + __global int2* pPairOut, + __global float4* pAABB ) +{ + int index = get_global_id(0); + if(index >= numObjects) + { + return; + } + float4 bbMin = pAABB[index * 2]; + int handleIndex = as_int(bbMin.w); + int2 start_curr = pPairBuffStartCurr[handleIndex]; + int start = start_curr.x; + int curr = start_curr.y; + __global int* pInp = pPairBuff + start; + __global int2* pOut = pPairOut + pPairScan[index+1]; + __global int* pOut2 = pInp; + int num = 0; + for(int k = 0; k < curr; k++, pInp++) + { + if(((*pInp) & 0x60000000)) + { + int2 newpair; + newpair.x = handleIndex; + newpair.y = (*pInp) & (~0x60000000); + *pOut = newpair; + pOut++; + } + if((*pInp) & 0x60000000) + { + *pOut2 = (*pInp) & (~0x60000000); + pOut2++; + num++; + } + } + int2 newStartCurr; + newStartCurr.x = start; + newStartCurr.y = num; + pPairBuffStartCurr[handleIndex] = newStartCurr; +} +); \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/broadphase_benchmark/findPairsOpenCL.cpp b/Extras/RigidBodyGpuPipeline/opencl/broadphase_benchmark/findPairsOpenCL.cpp new file mode 100644 index 000000000..d90e37054 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/broadphase_benchmark/findPairsOpenCL.cpp @@ -0,0 +1,204 @@ + +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Roman Ponomarev, Erwin Coumans + +#include "findPairsOpenCL.h" +#include "../basic_initialize/btOpenCLUtils.h" + +#define MSTRINGIFY(A) #A +static char* broadphaseKernelString = +#include "broadphaseKernel.cl" + +#define GRID_BROADPHASE_PATH "..\\..\\opencl\\broadphase_benchmark\\broadphaseKernel.cl" + + + + +void initFindPairs(btFindPairsIO& fpio,cl_context cxMainContext, cl_device_id device, cl_command_queue commandQueue, int maxHandles, int maxPairsPerBody) +{ + + //m_proxies.push_back( proxy ); + + fpio.m_mainContext = cxMainContext; + fpio.m_cqCommandQue = commandQueue; + fpio.m_device = device; + cl_int pErrNum; + cl_program prog = btOpenCLUtils::compileCLProgramFromString(cxMainContext, device, broadphaseKernelString, &pErrNum ,"",GRID_BROADPHASE_PATH); + + fpio.m_broadphaseBruteForceKernel = btOpenCLUtils::compileCLKernelFromString(cxMainContext,device, broadphaseKernelString, "broadphaseKernel" ,&pErrNum,prog); + fpio.m_initializeGpuAabbsKernelSimple = btOpenCLUtils::compileCLKernelFromString(cxMainContext,device, broadphaseKernelString, "initializeGpuAabbsSimple" ,&pErrNum,prog); + fpio.m_initializeGpuAabbsKernelFull = btOpenCLUtils::compileCLKernelFromString(cxMainContext,device, broadphaseKernelString, "initializeGpuAabbsFull" ,&pErrNum,prog); + + fpio.m_broadphaseColorKernel = btOpenCLUtils::compileCLKernelFromString(cxMainContext,device, broadphaseKernelString, "broadphaseColorKernel" ,&pErrNum,prog); + + fpio.m_setupBodiesKernel = btOpenCLUtils::compileCLKernelFromString(cxMainContext,device, broadphaseKernelString, "setupBodiesKernel" ,&pErrNum,prog); + fpio.m_copyVelocitiesKernel = btOpenCLUtils::compileCLKernelFromString(cxMainContext,device, broadphaseKernelString, "copyVelocitiesKernel" ,&pErrNum,prog); + + + +} + +void findPairsOpenCLBruteForce(btFindPairsIO& fpio) +{ + + int ciErrNum = 0; + + int numObjects = fpio.m_numObjects; + int offset = fpio.m_positionOffset; + + ciErrNum = clSetKernelArg(fpio.m_broadphaseBruteForceKernel, 0, sizeof(int), &offset); + ciErrNum = clSetKernelArg(fpio.m_broadphaseBruteForceKernel, 1, sizeof(int), &numObjects); + ciErrNum = clSetKernelArg(fpio.m_broadphaseBruteForceKernel, 2, sizeof(cl_mem), (void*)&fpio.m_clObjectsBuffer); + + size_t numWorkItems = numObjects;///workGroupSize*((NUM_OBJECTS + (workGroupSize)) / workGroupSize); + size_t workGroupSize = 64; + ciErrNum = clEnqueueNDRangeKernel(fpio.m_cqCommandQue, fpio.m_broadphaseBruteForceKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0); + oclCHECKERROR(ciErrNum, CL_SUCCESS); +} + +void setupGpuAabbsFull(btFindPairsIO& fpio, cl_mem bodies) +{ + + int ciErrNum = 0; + + int numObjects = fpio.m_numObjects; + int offset = fpio.m_positionOffset; + + ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelFull, 0, sizeof(int), &offset); + ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelFull, 1, sizeof(int), &numObjects); + ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelFull, 2, sizeof(cl_mem), (void*)&fpio.m_clObjectsBuffer); + ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelFull, 3, sizeof(cl_mem), (void*)&bodies); + ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelFull, 4, sizeof(cl_mem), (void*)&fpio.m_dlocalShapeAABB); + ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelFull, 5, sizeof(cl_mem), (void*)&fpio.m_dAABB); + size_t workGroupSize = 64; + size_t numWorkItems = workGroupSize*((numObjects+ (workGroupSize)) / workGroupSize); + + ciErrNum = clEnqueueNDRangeKernel(fpio.m_cqCommandQue, fpio.m_initializeGpuAabbsKernelFull, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0); + oclCHECKERROR(ciErrNum, CL_SUCCESS); +} + +void setupGpuAabbsSimple(btFindPairsIO& fpio) +{ + + int ciErrNum = 0; + + int numObjects = fpio.m_numObjects; + int offset = fpio.m_positionOffset; + + ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelSimple, 0, sizeof(int), &offset); + ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelSimple, 1, sizeof(int), &numObjects); + ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelSimple, 2, sizeof(cl_mem), (void*)&fpio.m_clObjectsBuffer); + ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelSimple, 3, sizeof(cl_mem), (void*)&fpio.m_dAABB); + size_t workGroupSize = 64; + size_t numWorkItems = workGroupSize*((numObjects+ (workGroupSize)) / workGroupSize); + + ciErrNum = clEnqueueNDRangeKernel(fpio.m_cqCommandQue, fpio.m_initializeGpuAabbsKernelSimple, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0); + oclCHECKERROR(ciErrNum, CL_SUCCESS); +} + + +void setupBodies(btFindPairsIO& fpio, cl_mem linVelMem, cl_mem angVelMem, cl_mem bodies, cl_mem bodyInertias) +{ + int ciErrNum = 0; + + int numObjects = fpio.m_numObjects; + int offset = fpio.m_positionOffset; + + ciErrNum = clSetKernelArg(fpio.m_setupBodiesKernel, 0, sizeof(int), &offset); + ciErrNum = clSetKernelArg(fpio.m_setupBodiesKernel, 1, sizeof(int), &fpio.m_numObjects); + ciErrNum = clSetKernelArg(fpio.m_setupBodiesKernel, 2, sizeof(cl_mem), (void*)&fpio.m_clObjectsBuffer); + + ciErrNum = clSetKernelArg(fpio.m_setupBodiesKernel, 3, sizeof(cl_mem), (void*)&linVelMem); + ciErrNum = clSetKernelArg(fpio.m_setupBodiesKernel, 4, sizeof(cl_mem), (void*)&angVelMem); + ciErrNum = clSetKernelArg(fpio.m_setupBodiesKernel, 5, sizeof(cl_mem), (void*)&bodies); + ciErrNum = clSetKernelArg(fpio.m_setupBodiesKernel, 6, sizeof(cl_mem), (void*)&bodyInertias); + + if (numObjects) + { + size_t workGroupSize = 64; + size_t numWorkItems = workGroupSize*((numObjects+ (workGroupSize)) / workGroupSize); + + ciErrNum = clEnqueueNDRangeKernel(fpio.m_cqCommandQue, fpio.m_setupBodiesKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + } + +} + + +void copyBodyVelocities(btFindPairsIO& fpio, cl_mem linVelMem, cl_mem angVelMem, cl_mem bodies, cl_mem bodyInertias) +{ + int ciErrNum = 0; + + int numObjects = fpio.m_numObjects; + int offset = fpio.m_positionOffset; + + ciErrNum = clSetKernelArg(fpio.m_copyVelocitiesKernel, 0, sizeof(int), &offset); + ciErrNum = clSetKernelArg(fpio.m_copyVelocitiesKernel, 1, sizeof(int), &fpio.m_numObjects); + ciErrNum = clSetKernelArg(fpio.m_copyVelocitiesKernel, 2, sizeof(cl_mem), (void*)&fpio.m_clObjectsBuffer); + + ciErrNum = clSetKernelArg(fpio.m_copyVelocitiesKernel, 3, sizeof(cl_mem), (void*)&linVelMem); + ciErrNum = clSetKernelArg(fpio.m_copyVelocitiesKernel, 4, sizeof(cl_mem), (void*)&angVelMem); + ciErrNum = clSetKernelArg(fpio.m_copyVelocitiesKernel, 5, sizeof(cl_mem), (void*)&bodies); + ciErrNum = clSetKernelArg(fpio.m_copyVelocitiesKernel, 6, sizeof(cl_mem), (void*)&bodyInertias); + + if (numObjects) + { + size_t workGroupSize = 64; + size_t numWorkItems = workGroupSize*((numObjects+ (workGroupSize)) / workGroupSize); + + ciErrNum = clEnqueueNDRangeKernel(fpio.m_cqCommandQue, fpio.m_copyVelocitiesKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + } + +} + +void colorPairsOpenCL(btFindPairsIO& fpio) +{ + int ciErrNum = 0; + + int numObjects = fpio.m_numObjects; + int offset = fpio.m_positionOffset; + + ciErrNum = clSetKernelArg(fpio.m_broadphaseColorKernel, 0, sizeof(int), &offset); + ciErrNum = clSetKernelArg(fpio.m_broadphaseColorKernel, 1, sizeof(int), &fpio.m_numObjects); + ciErrNum = clSetKernelArg(fpio.m_broadphaseColorKernel, 2, sizeof(cl_mem), (void*)&fpio.m_clObjectsBuffer); + ciErrNum = clSetKernelArg(fpio.m_broadphaseColorKernel, 3, sizeof(cl_mem), (void*)&fpio.m_dAllOverlappingPairs); + ciErrNum = clSetKernelArg(fpio.m_broadphaseColorKernel, 4, sizeof(int), &fpio.m_numOverlap); + + + if (fpio.m_numOverlap) + { + size_t workGroupSize = 64; + size_t numWorkItems = workGroupSize*((fpio.m_numOverlap+ (workGroupSize)) / workGroupSize); + + ciErrNum = clEnqueueNDRangeKernel(fpio.m_cqCommandQue, fpio.m_broadphaseColorKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + } +} + + + +void releaseFindPairs(btFindPairsIO& fpio) +{ + clReleaseKernel(fpio.m_initializeGpuAabbsKernelSimple); + clReleaseKernel(fpio.m_initializeGpuAabbsKernelFull); + clReleaseKernel(fpio.m_broadphaseColorKernel); + clReleaseKernel(fpio.m_broadphaseBruteForceKernel); + clReleaseKernel(fpio.m_setupBodiesKernel); + clReleaseKernel(fpio.m_copyVelocitiesKernel); + + +} + diff --git a/Extras/RigidBodyGpuPipeline/opencl/broadphase_benchmark/findPairsOpenCL.h b/Extras/RigidBodyGpuPipeline/opencl/broadphase_benchmark/findPairsOpenCL.h new file mode 100644 index 000000000..771543685 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/broadphase_benchmark/findPairsOpenCL.h @@ -0,0 +1,90 @@ + +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Roman Ponomarev, Erwin Coumans + +#ifndef FIND_PAIRS_H +#define FIND_PAIRS_H + +#include "../basic_initialize/btOpenCLInclude.h" + +struct btKernelInfo +{ + int m_Id; + cl_kernel m_kernel; + char* m_name; + int m_workgroupSize; +}; + + + +struct btFindPairsIO +{ + int m_numObjects; + + cl_mem m_clObjectsBuffer; //for memory layout details see main.cpp (todo, make it flexible) + int m_positionOffset;//offset in m_clObjectsBuffer where position array starts + + cl_command_queue m_cqCommandQue; + cl_kernel m_initializeGpuAabbsKernelSimple; + cl_kernel m_initializeGpuAabbsKernelFull; + cl_kernel m_broadphaseColorKernel; + cl_kernel m_broadphaseBruteForceKernel; + + cl_kernel m_setupBodiesKernel; + cl_kernel m_copyVelocitiesKernel; + + cl_context m_mainContext; + cl_device_id m_device; + + cl_kernel m_calcHashAabbKernel; + cl_kernel m_clearCellStartKernel; + cl_kernel m_findCellStartKernel; + cl_kernel m_findOverlappingPairsKernel; + cl_kernel m_computePairChangeKernel; + cl_kernel m_squeezePairBuffKernel; + + + cl_mem m_dAllOverlappingPairs; + int m_numOverlap; + + cl_mem m_dBpParams; + cl_mem m_dBodiesHash; + cl_mem m_dCellStart; + cl_mem m_dPairBuff; + cl_mem m_dPairBuffStartCurr; + cl_mem m_dlocalShapeAABB; + cl_mem m_dAABB; + cl_mem m_dPairScan; + cl_mem m_dPairOut; +}; + + +void initFindPairs(btFindPairsIO& fpio,cl_context cxMainContext, cl_device_id device, cl_command_queue commandQueue, int maxHandles,int maxPairsPerBody = 16); + +void findPairsOpenCLBruteForce(btFindPairsIO& fpio); + +void setupGpuAabbsSimple(btFindPairsIO& fpio); + +void setupGpuAabbsFull(btFindPairsIO& fpio, cl_mem bodies); + + +void colorPairsOpenCL(btFindPairsIO& fpio); + +void setupBodies(btFindPairsIO& fpio, cl_mem linVelMem, cl_mem angVelMem, cl_mem bodies, cl_mem bodyInertias); +void copyBodyVelocities(btFindPairsIO& fpio, cl_mem linVelMem, cl_mem angVelMem, cl_mem bodies, cl_mem bodyInertias); + +void releaseFindPairs(btFindPairsIO& fpio); + +#endif //FIND_PAIRS_H diff --git a/Extras/RigidBodyGpuPipeline/opencl/broadphase_benchmark/integrateKernel.cl b/Extras/RigidBodyGpuPipeline/opencl/broadphase_benchmark/integrateKernel.cl new file mode 100644 index 000000000..87d2b2569 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/broadphase_benchmark/integrateKernel.cl @@ -0,0 +1,116 @@ +MSTRINGIFY( + +float4 quatMult(float4 q1, float4 q2) +{ + float4 q; + q.x = q1.w * q2.x + q1.x * q2.w + q1.y * q2.z - q1.z * q2.y; + q.y = q1.w * q2.y + q1.y * q2.w + q1.z * q2.x - q1.x * q2.z; + q.z = q1.w * q2.z + q1.z * q2.w + q1.x * q2.y - q1.y * q2.x; + q.w = q1.w * q2.w - q1.x * q2.x - q1.y * q2.y - q1.z * q2.z; + return q; +} + +float4 quatNorm(float4 q) +{ + float len = native_sqrt(dot(q, q)); + if(len > 0.f) + { + q *= 1.f / len; + } + else + { + q.x = q.y = q.z = 0.f; + q.w = 1.f; + } + return q; +} + + + + + +__kernel void + integrateTransformsKernel( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer, + __global float4 *linVel, + __global float4 *pAngVel, + __global float* pBodyTimes) +{ + int nodeID = get_global_id(0); + + + + float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f); + float mAmplitude = 66.f; + float timeStep = 0.0166666f; + + if( nodeID < numNodes ) + { + + //g_vertexBuffer[nodeID + startOffset/4+numNodes] += pAngVel[nodeID]; + if (1) + { + float4 axis; + //add some hardcoded angular damping + pAngVel[nodeID].x *= 0.99f; + pAngVel[nodeID].y *= 0.99f; + pAngVel[nodeID].z *= 0.99f; + + float4 angvel = pAngVel[nodeID]; + float fAngle = native_sqrt(dot(angvel, angvel)); + //limit the angular motion + if(fAngle*timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD) + { + fAngle = BT_GPU_ANGULAR_MOTION_THRESHOLD / timeStep; + } + if(fAngle < 0.001f) + { + // use Taylor's expansions of sync function + axis = angvel * (0.5f*timeStep-(timeStep*timeStep*timeStep)*0.020833333333f * fAngle * fAngle); + } + else + { + // sync(fAngle) = sin(c*fAngle)/t + axis = angvel * ( native_sin(0.5f * fAngle * timeStep) / fAngle); + } + float4 dorn = axis; + dorn.w = native_cos(fAngle * timeStep * 0.5f); + float4 orn0 = g_vertexBuffer[nodeID + startOffset/4+numNodes]; + float4 predictedOrn = quatMult(dorn, orn0); + predictedOrn = quatNorm(predictedOrn); + g_vertexBuffer[nodeID + startOffset/4+numNodes]=predictedOrn; + } + + //linear velocity + g_vertexBuffer[nodeID + startOffset/4] += linVel[nodeID] * timeStep; + + } +} + + +__kernel void + sineWaveKernel( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer, + __global float4 *linVel, + __global float4 *pAngVel, + __global float* pBodyTimes) +{ + int nodeID = get_global_id(0); + float timeStepPos = 0.000166666; + + float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f); + float mAmplitude = 166.f; + + + if( nodeID < numNodes ) + { + pBodyTimes[nodeID] += timeStepPos; + float4 position = g_vertexBuffer[nodeID + startOffset/4]; + position.x = native_cos(pBodyTimes[nodeID]*2.17f)*mAmplitude + native_sin(pBodyTimes[nodeID])*mAmplitude*0.5f; + position.y = native_cos(pBodyTimes[nodeID]*1.38f)*mAmplitude + native_sin(pBodyTimes[nodeID]*mAmplitude); + position.z = native_cos(pBodyTimes[nodeID]*2.17f)*mAmplitude + native_sin(pBodyTimes[nodeID]*0.777f)*mAmplitude; + g_vertexBuffer[nodeID + startOffset/4] = position; + } +} + + + +); \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/broadphase_benchmark/main.cpp b/Extras/RigidBodyGpuPipeline/opencl/broadphase_benchmark/main.cpp new file mode 100644 index 000000000..3e52c1976 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/broadphase_benchmark/main.cpp @@ -0,0 +1,1565 @@ + +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Erwin Coumans + +//starts crashing when more than 32700 objects on my Geforce 260, unless _USE_SUB_DATA is defined (still unstable though) +//runs fine with fewer objects + +#define NUM_OBJECTS_X 42 +//327 +#define NUM_OBJECTS_Y 42 +#define NUM_OBJECTS_Z 42 +//#define NUM_OBJECTS_Z 20 + +//#define _USE_SUB_DATA + +//#define NUM_OBJECTS_X 100 +//#define NUM_OBJECTS_Y 100 +//#define NUM_OBJECTS_Z 100 + +///RECREATE_CL_AND_SHADERS_ON_RESIZE will delete and re-create OpenCL and GLSL shaders/buffers at each resize +//#define RECREATE_CL_AND_SHADERS_ON_RESIZE + +/// +/// OpenCL - OpenGL interop example. Updating transforms of many cubes on GPU, without going through main memory/using the PCIe bus +/// Create all OpenGL resources AFTER create OpenCL context! +/// + + +#include +#include + +#include "btGlutInclude.h" +#include "../opengl_interop/btStopwatch.h" + + +#include "LinearMath/btVector3.h" +#include "LinearMath/btQuaternion.h" +#include "LinearMath/btMatrix3x3.h" +static float sAngle(0); + +#include + +#ifdef _WIN32 +#include +#endif + +#include +#include +#include "../3dGridBroadphase/Shared/btGpu3DGridBroadphase.h" +#include "../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.h" +#include "btGridBroadphaseCl.h" + +#define USE_NEW +#ifdef USE_NEW +btGridBroadphaseCl* sBroadphase=0; +#else +btGpu3DGridBroadphase* sBroadphase=0; +#endif + +btAlignedObjectArray proxyArray; + + +#define RS_SCALE (1.0 / (1.0 + RAND_MAX)) + + +int randbiased (double x) { + for (;;) { + double p = rand () * RS_SCALE; + if (p >= x) return 0; + if (p+RS_SCALE <= x) return 1; + /* p < x < p+RS_SCALE */ + x = (x - p) * (1.0 + RAND_MAX); + } +} + +size_t randrange (size_t n) +{ + double xhi; + double resolution = n * RS_SCALE; + double x = resolution * rand (); /* x in [0,n) */ + size_t lo = (size_t) floor (x); + + xhi = x + resolution; + + for (;;) { + lo++; + if (lo >= xhi || randbiased ((lo - x) / (xhi - x))) return lo-1; + x = lo; + } +} + +//OpenCL stuff +#include "../basic_initialize/btOpenCLUtils.h" +#include "../opengl_interop/btOpenCLGLInteropBuffer.h" +#include "findPairsOpenCL.h" + +btFindPairsIO gFpIO; + +cl_context g_cxMainContext; +cl_command_queue g_cqCommandQue; +cl_device_id g_device; +static const size_t workGroupSize = 64; +cl_mem gLinVelMem; +cl_mem gAngVelMem; +cl_mem gBodyTimes; + +btVector3 m_cameraPosition(142,220,142); +btVector3 m_cameraTargetPosition(0,-30,0); +btScalar m_cameraDistance = 200; +btVector3 m_cameraUp(0,1,0); +float m_azi=-50.f; +float m_ele=0.f; + + + + +btOpenCLGLInteropBuffer* g_interopBuffer = 0; +cl_kernel g_sineWaveKernel; + + + +////for Adl +#include + +adl::DeviceCL* g_deviceCL=0; + + + +bool useCPU = false; +bool printStats = false; +bool runOpenCLKernels = true; + +#define MSTRINGIFY(A) #A +static char* interopKernelString = +#include "integrateKernel.cl" + + +btStopwatch gStopwatch; +int m_glutScreenWidth = 640; +int m_glutScreenHeight= 480; + +bool m_ortho = false; + +static GLuint instancingShader; // The instancing renderer +static GLuint cube_vao; +static GLuint cube_vbo; +static GLuint index_vbo; +static GLuint m_texturehandle; + +static bool done = false; +static GLint angle_loc = 0; +static GLint ModelViewMatrix; +static GLint ProjectionMatrix; + +void writeTransforms(); + +static GLint uniform_texture_diffuse = 0; + +//used for dynamic loading from disk (default switched off) +#define MAX_SHADER_LENGTH 8192 +static GLubyte shaderText[MAX_SHADER_LENGTH]; + +static const char* vertexShader= \ +"#version 330\n" +"precision highp float;\n" +"\n" +"\n" +"\n" +"layout (location = 0) in vec4 position;\n" +"layout (location = 1) in vec4 instance_position;\n" +"layout (location = 2) in vec4 instance_quaternion;\n" +"layout (location = 3) in vec2 uvcoords;\n" +"layout (location = 4) in vec3 vertexnormal;\n" +"layout (location = 5) in vec4 instance_color;\n" +"layout (location = 6) in vec3 instance_scale;\n" +"\n" +"\n" +"uniform float angle = 0.0;\n" +"uniform mat4 ModelViewMatrix;\n" +"uniform mat4 ProjectionMatrix;\n" +"\n" +"out Fragment\n" +"{\n" +" vec4 color;\n" +"} fragment;\n" +"\n" +"out Vert\n" +"{\n" +" vec2 texcoord;\n" +"} vert;\n" +"\n" +"\n" +"vec4 quatMul ( in vec4 q1, in vec4 q2 )\n" +"{\n" +" vec3 im = q1.w * q2.xyz + q1.xyz * q2.w + cross ( q1.xyz, q2.xyz );\n" +" vec4 dt = q1 * q2;\n" +" float re = dot ( dt, vec4 ( -1.0, -1.0, -1.0, 1.0 ) );\n" +" return vec4 ( im, re );\n" +"}\n" +"\n" +"vec4 quatFromAxisAngle(vec4 axis, in float angle)\n" +"{\n" +" float cah = cos(angle*0.5);\n" +" float sah = sin(angle*0.5);\n" +" float d = inversesqrt(dot(axis,axis));\n" +" vec4 q = vec4(axis.x*sah*d,axis.y*sah*d,axis.z*sah*d,cah);\n" +" return q;\n" +"}\n" +"//\n" +"// vector rotation via quaternion\n" +"//\n" +"vec4 quatRotate3 ( in vec3 p, in vec4 q )\n" +"{\n" +" vec4 temp = quatMul ( q, vec4 ( p, 0.0 ) );\n" +" return quatMul ( temp, vec4 ( -q.x, -q.y, -q.z, q.w ) );\n" +"}\n" +"vec4 quatRotate ( in vec4 p, in vec4 q )\n" +"{\n" +" vec4 temp = quatMul ( q, p );\n" +" return quatMul ( temp, vec4 ( -q.x, -q.y, -q.z, q.w ) );\n" +"}\n" +"\n" +"out vec3 lightDir,normal,ambient;\n" +"\n" +"void main(void)\n" +"{\n" +" vec4 q = instance_quaternion;\n" +" ambient = vec3(0.2,0.2,0.2);\n" +" \n" +" \n" +" vec4 local_normal = (quatRotate3( vertexnormal,q));\n" +" vec3 light_pos = vec3(10000,10000,10000);\n" +" normal = normalize(ModelViewMatrix * local_normal).xyz;\n" +"\n" +" lightDir = normalize(light_pos);//gl_LightSource[0].position.xyz));\n" +"// lightDir = normalize(vec3(gl_LightSource[0].position));\n" +" \n" +" vec4 axis = vec4(1,1,1,0);\n" +" vec4 localcoord = quatRotate3( position.xyz*instance_scale,q);\n" +" vec4 vertexPos = ProjectionMatrix * ModelViewMatrix *(instance_position+localcoord);\n" +"\n" +" gl_Position = vertexPos;\n" +" \n" +" fragment.color = instance_color;\n" +" vert.texcoord = uvcoords;\n" +"}\n" +; + + +static const char* fragmentShader= \ +"#version 330\n" +"precision highp float;\n" +"\n" +"in Fragment\n" +"{\n" +" vec4 color;\n" +"} fragment;\n" +"\n" +"in Vert\n" +"{\n" +" vec2 texcoord;\n" +"} vert;\n" +"\n" +"uniform sampler2D Diffuse;\n" +"\n" +"in vec3 lightDir,normal,ambient;\n" +"\n" +"out vec4 color;\n" +"\n" +"void main_textured(void)\n" +"{\n" +" color = texture2D(Diffuse,vert.texcoord);//fragment.color;\n" +"}\n" +"\n" +"void main(void)\n" +"{\n" +" vec4 texel = fragment.color*texture2D(Diffuse,vert.texcoord);//fragment.color;\n" +" vec3 ct,cf;\n" +" float intensity,at,af;\n" +" intensity = max(dot(lightDir,normalize(normal)),0.5);\n" +" cf = intensity*vec3(1.0,1.0,1.0);//intensity * (gl_FrontMaterial.diffuse).rgb+ambient;//gl_FrontMaterial.ambient.rgb;\n" +" af = 1.0;\n" +" \n" +" ct = texel.rgb;\n" +" at = texel.a;\n" +" \n" +" color = vec4(ct * cf, at * af); \n" +"}\n" +; + + +// Load the shader from the source text +void gltLoadShaderSrc(const char *szShaderSrc, GLuint shader) +{ + GLchar *fsStringPtr[1]; + + fsStringPtr[0] = (GLchar *)szShaderSrc; + glShaderSource(shader, 1, (const GLchar **)fsStringPtr, NULL); +} + + +//////////////////////////////////////////////////////////////// +// Load the shader from the specified file. Returns false if the +// shader could not be loaded +bool gltLoadShaderFile(const char *szFile, GLuint shader) +{ + GLint shaderLength = 0; + FILE *fp; + + // Open the shader file + fp = fopen(szFile, "r"); + if(fp != NULL) + { + // See how long the file is + while (fgetc(fp) != EOF) + shaderLength++; + + // Allocate a block of memory to send in the shader + assert(shaderLength < MAX_SHADER_LENGTH); // make me bigger! + if(shaderLength > MAX_SHADER_LENGTH) + { + fclose(fp); + return false; + } + + // Go back to beginning of file + rewind(fp); + + // Read the whole file in + if (shaderText != NULL) + fread(shaderText, 1, shaderLength, fp); + + // Make sure it is null terminated and close the file + shaderText[shaderLength] = '\0'; + fclose(fp); + } + else + return false; + + // printf(shaderText); + // Load the string + gltLoadShaderSrc((const char *)shaderText, shader); + + return true; +} + + +///////////////////////////////////////////////////////////////// +// Load a pair of shaders, compile, and link together. Specify the complete +// file path for each shader. Note, there is no support for +// just loading say a vertex program... you have to do both. +GLuint gltLoadShaderPair(const char *szVertexProg, const char *szFragmentProg, bool loadFromFile) +{ + // Temporary Shader objects + GLuint hVertexShader; + GLuint hFragmentShader; + GLuint hReturn = 0; + GLint testVal; + + // Create shader objects + hVertexShader = glCreateShader(GL_VERTEX_SHADER); + hFragmentShader = glCreateShader(GL_FRAGMENT_SHADER); + + if (loadFromFile) + { + + if(gltLoadShaderFile(szVertexProg, hVertexShader) == false) + { + glDeleteShader(hVertexShader); + glDeleteShader(hFragmentShader); + return (GLuint)NULL; + } + + if(gltLoadShaderFile(szFragmentProg, hFragmentShader) == false) + { + glDeleteShader(hVertexShader); + glDeleteShader(hFragmentShader); + return (GLuint)NULL; + } + } else + { + gltLoadShaderSrc(vertexShader, hVertexShader); + gltLoadShaderSrc(fragmentShader, hFragmentShader); + } + // Compile them + glCompileShader(hVertexShader); + glCompileShader(hFragmentShader); + + // Check for errors + glGetShaderiv(hVertexShader, GL_COMPILE_STATUS, &testVal); + if(testVal == GL_FALSE) + { + char temp[256] = ""; + glGetShaderInfoLog( hVertexShader, 256, NULL, temp); + fprintf( stderr, "Compile failed:\n%s\n", temp); + assert(0); + exit(0); + glDeleteShader(hVertexShader); + glDeleteShader(hFragmentShader); + return (GLuint)NULL; + } + + glGetShaderiv(hFragmentShader, GL_COMPILE_STATUS, &testVal); + if(testVal == GL_FALSE) + { + char temp[256] = ""; + glGetShaderInfoLog( hFragmentShader, 256, NULL, temp); + fprintf( stderr, "Compile failed:\n%s\n", temp); + assert(0); + exit(0); + glDeleteShader(hVertexShader); + glDeleteShader(hFragmentShader); + return (GLuint)NULL; + } + + // Link them - assuming it works... + hReturn = glCreateProgram(); + glAttachShader(hReturn, hVertexShader); + glAttachShader(hReturn, hFragmentShader); + + glLinkProgram(hReturn); + + // These are no longer needed + glDeleteShader(hVertexShader); + glDeleteShader(hFragmentShader); + + // Make sure link worked too + glGetProgramiv(hReturn, GL_LINK_STATUS, &testVal); + if(testVal == GL_FALSE) + { + glDeleteProgram(hReturn); + return (GLuint)NULL; + } + + return hReturn; +} + +///position xyz, unused w, normal, uv +static const GLfloat cube_vertices[] = +{ + -1.0f, -1.0f, 1.0f, 0.0f, 0,0,1, 0,0,//0 + 1.0f, -1.0f, 1.0f, 0.0f, 0,0,1, 1,0,//1 + 1.0f, 1.0f, 1.0f, 0.0f, 0,0,1, 1,1,//2 + -1.0f, 1.0f, 1.0f, 0.0f, 0,0,1, 0,1 ,//3 + + -1.0f, -1.0f, -1.0f, 1.0f, 0,0,-1, 0,0,//4 + 1.0f, -1.0f, -1.0f, 1.0f, 0,0,-1, 1,0,//5 + 1.0f, 1.0f, -1.0f, 1.0f, 0,0,-1, 1,1,//6 + -1.0f, 1.0f, -1.0f, 1.0f, 0,0,-1, 0,1,//7 + + -1.0f, -1.0f, -1.0f, 1.0f, -1,0,0, 0,0, + -1.0f, 1.0f, -1.0f, 1.0f, -1,0,0, 1,0, + -1.0f, 1.0f, 1.0f, 1.0f, -1,0,0, 1,1, + -1.0f, -1.0f, 1.0f, 1.0f, -1,0,0, 0,1, + + 1.0f, -1.0f, -1.0f, 1.0f, 1,0,0, 0,0, + 1.0f, 1.0f, -1.0f, 1.0f, 1,0,0, 1,0, + 1.0f, 1.0f, 1.0f, 1.0f, 1,0,0, 1,1, + 1.0f, -1.0f, 1.0f, 1.0f, 1,0,0, 0,1, + + -1.0f, -1.0f, -1.0f, 1.0f, 0,-1,0, 0,0, + -1.0f, -1.0f, 1.0f, 1.0f, 0,-1,0, 1,0, + 1.0f, -1.0f, 1.0f, 1.0f, 0,-1,0, 1,1, + 1.0f,-1.0f, -1.0f, 1.0f, 0,-1,0, 0,1, + + -1.0f, 1.0f, -1.0f, 1.0f, 0,1,0, 0,0, + -1.0f, 1.0f, 1.0f, 1.0f, 0,1,0, 1,0, + 1.0f, 1.0f, 1.0f, 1.0f, 0,1,0, 1,1, + 1.0f,1.0f, -1.0f, 1.0f, 0,1,0, 0,1, +}; + +static const int cube_indices[]= +{ + 0,1,2,0,2,3,//ground face + 4,5,6,4,6,7,//top face + 8,9,10,8,10,11, + 12,13,14,12,14,15, + 16,17,18,16,18,19, + 20,21,22,20,22,23 +}; + +int m_mouseOldX = -1; +int m_mouseOldY = -1; +int m_mouseButtons = 0; + + +void mouseFunc(int button, int state, int x, int y) +{ + if (state == 0) + { + m_mouseButtons |= 1<0) + { + g_device= btOpenCLUtils::getDevice(g_cxMainContext,0); + btOpenCLDeviceInfo clInfo; + btOpenCLUtils::getDeviceInfo(g_device,clInfo); + btOpenCLUtils::printDeviceInfo(g_device); + // create a command-queue + g_cqCommandQue = clCreateCommandQueue(g_cxMainContext, g_device, 0, &ciErrNum); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + //normally you would create and execute kernels using this command queue + + } + + +} + +#define NUM_OBJECTS (NUM_OBJECTS_X*NUM_OBJECTS_Y*NUM_OBJECTS_Z) +#define POSITION_BUFFER_SIZE (NUM_OBJECTS*sizeof(float)*4) +#define ORIENTATION_BUFFER_SIZE (NUM_OBJECTS*sizeof(float)*4) +#define COLOR_BUFFER_SIZE (NUM_OBJECTS*sizeof(float)*4) +#define SCALE_BUFFER_SIZE (NUM_OBJECTS*sizeof(float)*3) + + +GLfloat* instance_positions_ptr = 0; +GLfloat* instance_quaternion_ptr = 0; +GLfloat* instance_colors_ptr = 0; +GLfloat* instance_scale_ptr= 0; + + +void DeleteShaders() +{ + glDeleteVertexArrays(1, &cube_vao); + glDeleteBuffers(1,&index_vbo); + glDeleteBuffers(1,&cube_vbo); + glDeleteProgram(instancingShader); +} + + +void InitShaders() +{ + + btOverlappingPairCache* overlappingPairCache=0; +#ifdef USE_NEW + sBroadphase = new btGridBroadphaseCl(overlappingPairCache,btVector3(3.f, 3.f, 3.f), 32, 32, 32,NUM_OBJECTS, NUM_OBJECTS, 64, 100.f, 16, + g_cxMainContext ,g_device,g_cqCommandQue); +#else + sBroadphase = new btGpu3DGridBroadphase(btVector3(10.f, 10.f, 10.f), 32, 32, 32,NUM_OBJECTS, NUM_OBJECTS, 64, 100.f, 16); +#endif + + + +// sBroadphase = new bt3dGridBroadphaseOCL(overlappingPairCache,btVector3(10.f, 10.f, 10.f), 32, 32, 32,NUM_OBJECTS, NUM_OBJECTS, 64, 100.f, 16, +// g_cxMainContext ,g_device,g_cqCommandQue); + + + + bool loadFromFile = false; + instancingShader = gltLoadShaderPair("instancing.vs","instancing.fs", loadFromFile); + + glLinkProgram(instancingShader); + glUseProgram(instancingShader); + angle_loc = glGetUniformLocation(instancingShader, "angle"); + ModelViewMatrix = glGetUniformLocation(instancingShader, "ModelViewMatrix"); + ProjectionMatrix = glGetUniformLocation(instancingShader, "ProjectionMatrix"); + uniform_texture_diffuse = glGetUniformLocation(instancingShader, "Diffuse"); + + GLuint offset = 0; + + + glGenBuffers(1, &cube_vbo); + glBindBuffer(GL_ARRAY_BUFFER, cube_vbo); + + instance_positions_ptr = (GLfloat*)new float[NUM_OBJECTS*4]; + instance_quaternion_ptr = (GLfloat*)new float[NUM_OBJECTS*4]; + instance_colors_ptr = (GLfloat*)new float[NUM_OBJECTS*4]; + instance_scale_ptr = (GLfloat*)new float[NUM_OBJECTS*3]; + + int index=0; + for (int i=0;icreateProxy(aabbMin,aabbMax,shapeType,myptr,1,1,0,0);//m_dispatcher); + proxyArray.push_back(proxy); + + instance_quaternion_ptr[index*4]=0; + instance_quaternion_ptr[index*4+1]=0; + instance_quaternion_ptr[index*4+2]=0; + instance_quaternion_ptr[index*4+3]=1; + + instance_colors_ptr[index*4]=j m_glutScreenHeight) + { + aspect = m_glutScreenWidth / (float)m_glutScreenHeight; + extents.setValue(aspect * 1.0f, 1.0f,0); + } else + { + aspect = m_glutScreenHeight / (float)m_glutScreenWidth; + extents.setValue(1.0f, aspect*1.f,0); + } + + + if (m_ortho) + { + // reset matrix + glLoadIdentity(); + extents *= m_cameraDistance; + btVector3 lower = m_cameraTargetPosition - extents; + btVector3 upper = m_cameraTargetPosition + extents; + glOrtho(lower.getX(), upper.getX(), lower.getY(), upper.getY(),-1000,1000); + + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); + } else + { + if (m_glutScreenWidth > m_glutScreenHeight) + { + glFrustum (-aspect * m_frustumZNear, aspect * m_frustumZNear, -m_frustumZNear, m_frustumZNear, m_frustumZNear, m_frustumZFar); + } else + { + glFrustum (-aspect * m_frustumZNear, aspect * m_frustumZNear, -m_frustumZNear, m_frustumZNear, m_frustumZNear, m_frustumZFar); + } + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); + gluLookAt(m_cameraPosition[0], m_cameraPosition[1], m_cameraPosition[2], + m_cameraTargetPosition[0], m_cameraTargetPosition[1], m_cameraTargetPosition[2], + m_cameraUp.getX(),m_cameraUp.getY(),m_cameraUp.getZ()); + } + +} + + + +void myinit() +{ + + + + // GLfloat light_ambient[] = { btScalar(0.2), btScalar(0.2), btScalar(0.2), btScalar(1.0) }; + GLfloat light_ambient[] = { btScalar(1.0), btScalar(1.2), btScalar(0.2), btScalar(1.0) }; + + GLfloat light_diffuse[] = { btScalar(1.0), btScalar(1.0), btScalar(1.0), btScalar(1.0) }; + GLfloat light_specular[] = { btScalar(1.0), btScalar(1.0), btScalar(1.0), btScalar(1.0 )}; + /* light_position is NOT default value */ + GLfloat light_position0[] = { btScalar(10000.0), btScalar(10000.0), btScalar(10000.0), btScalar(0.0 )}; + GLfloat light_position1[] = { btScalar(-1.0), btScalar(-10.0), btScalar(-1.0), btScalar(0.0) }; + + glLightfv(GL_LIGHT0, GL_AMBIENT, light_ambient); + glLightfv(GL_LIGHT0, GL_DIFFUSE, light_diffuse); + glLightfv(GL_LIGHT0, GL_SPECULAR, light_specular); + glLightfv(GL_LIGHT0, GL_POSITION, light_position0); + + glLightfv(GL_LIGHT1, GL_AMBIENT, light_ambient); + glLightfv(GL_LIGHT1, GL_DIFFUSE, light_diffuse); + glLightfv(GL_LIGHT1, GL_SPECULAR, light_specular); + glLightfv(GL_LIGHT1, GL_POSITION, light_position1); + + glEnable(GL_LIGHTING); + glEnable(GL_LIGHT0); + glEnable(GL_LIGHT1); + + + // glShadeModel(GL_FLAT);//GL_SMOOTH); + glShadeModel(GL_SMOOTH); + + glEnable(GL_DEPTH_TEST); + glDepthFunc(GL_LESS); + + glClearColor(float(0.7),float(0.7),float(0.7),float(0)); + glEnable(GL_LIGHTING); + glEnable(GL_LIGHT0); + + + static bool m_textureenabled = true; + static bool m_textureinitialized = false; + + + if(m_textureenabled) + { + if(!m_textureinitialized) + { + glActiveTexture(GL_TEXTURE0); + + GLubyte* image=new GLubyte[256*256*3]; + for(int y=0;y<256;++y) + { + const int t=y>>5; + GLubyte* pi=image+y*256*3; + for(int x=0;x<256;++x) + { + const int s=x>>5; + const GLubyte b=180; + GLubyte c=b+((s+t&1)&1)*(255-b); + pi[0]=c; + pi[1]=c; + pi[2]=c; + pi+=3; + } + } + + glGenTextures(1,(GLuint*)&m_texturehandle); + glBindTexture(GL_TEXTURE_2D,m_texturehandle); + glTexEnvf(GL_TEXTURE_ENV,GL_TEXTURE_ENV_MODE,GL_MODULATE); + glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_MIN_FILTER,GL_LINEAR_MIPMAP_LINEAR); + glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_MAG_FILTER,GL_LINEAR_MIPMAP_LINEAR); + glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_WRAP_S,GL_REPEAT); + glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_WRAP_T,GL_REPEAT); + gluBuild2DMipmaps(GL_TEXTURE_2D,3,256,256,GL_RGB,GL_UNSIGNED_BYTE,image); + delete[] image; + m_textureinitialized=true; + } + // glMatrixMode(GL_TEXTURE); + // glLoadIdentity(); + // glMatrixMode(GL_MODELVIEW); + + glEnable(GL_TEXTURE_2D); + glBindTexture(GL_TEXTURE_2D,m_texturehandle); + + } else + { + glDisable(GL_TEXTURE_2D); + } + + glEnable(GL_COLOR_MATERIAL); + + + // glEnable(GL_CULL_FACE); + // glCullFace(GL_BACK); +} + +//#pragma optimize( "g", off ) + + + +void writeTransforms() +{ + + + glFlush(); + char* bla = (char*)glMapBuffer( GL_ARRAY_BUFFER,GL_READ_WRITE);//GL_WRITE_ONLY + + float* positions = (float*)(bla+sizeof(cube_vertices)); + float* orientations = (float*)(bla+sizeof(cube_vertices) + POSITION_BUFFER_SIZE); + float* colors= (float*)(bla+sizeof(cube_vertices) + POSITION_BUFFER_SIZE+ORIENTATION_BUFFER_SIZE); + float* scaling= (float*)(bla+sizeof(cube_vertices) + POSITION_BUFFER_SIZE+ORIENTATION_BUFFER_SIZE+COLOR_BUFFER_SIZE); + + // positions[0]+=0.001f; + + static int offset=0; + //offset++; + + static btVector3 axis(1,0,0); + sAngle += 0.01f; + int index=0; + btQuaternion orn(axis,sAngle); + for (int i=0;igetCLBUffer(); + cl_int ciErrNum = CL_SUCCESS; + ciErrNum = clEnqueueAcquireGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, NULL); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + if (runOpenCLKernels) + { + int numObjects = NUM_OBJECTS; + int offset = (sizeof(cube_vertices) )/4; + + ciErrNum = clSetKernelArg(g_sineWaveKernel, 0, sizeof(int), &offset); + ciErrNum = clSetKernelArg(g_sineWaveKernel, 1, sizeof(int), &numObjects); + ciErrNum = clSetKernelArg(g_sineWaveKernel, 2, sizeof(cl_mem), (void*)&clBuffer ); + + ciErrNum = clSetKernelArg(g_sineWaveKernel, 3, sizeof(cl_mem), (void*)&gLinVelMem); + ciErrNum = clSetKernelArg(g_sineWaveKernel, 4, sizeof(cl_mem), (void*)&gAngVelMem); + ciErrNum = clSetKernelArg(g_sineWaveKernel, 5, sizeof(cl_mem), (void*)&gBodyTimes); + + + + + + size_t numWorkItems = workGroupSize*((NUM_OBJECTS + (workGroupSize)) / workGroupSize); + ciErrNum = clEnqueueNDRangeKernel(g_cqCommandQue, g_sineWaveKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + } + + ciErrNum = clEnqueueReleaseGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, 0); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + clFinish(g_cqCommandQue); + + } + +} + + +void cpuBroadphase() +{ + glFlush(); + char* bla = (char*)glMapBuffer( GL_ARRAY_BUFFER,GL_READ_WRITE);//GL_WRITE_ONLY + + float* positions = (float*)(bla+sizeof(cube_vertices)); + float* orientations = (float*)(bla+sizeof(cube_vertices) + POSITION_BUFFER_SIZE); + float* colors= (float*)(bla+sizeof(cube_vertices) + POSITION_BUFFER_SIZE+ORIENTATION_BUFFER_SIZE); + float* scaling= (float*)(bla+sizeof(cube_vertices) + POSITION_BUFFER_SIZE+ORIENTATION_BUFFER_SIZE+COLOR_BUFFER_SIZE); + + int index=0; + + for (int i=0;isetAabb(proxyArray[index],aabbMin,aabbMax,0); + + index++; + } + } + } + +#ifdef USE_NEW + + +#else + sBroadphase->calculateOverlappingPairs(0); + int overlap = sBroadphase->getOverlappingPairCache()->getNumOverlappingPairs(); + for (int i=0;igetOverlappingPairCache()->getOverlappingPairArray()[i]; + int indexA = (int)pair.m_pProxy0->m_clientObject; + int indexB = (int)pair.m_pProxy1->m_clientObject; + colors[indexA*4] = 1.f; + colors[indexA*4+1] = 0.f; + colors[indexA*4+2] = 0.f; + colors[indexA*4+3] = 1.f; + + colors[indexB*4] = 1.f; + colors[indexB*4+1] = 0.f; + colors[indexB*4+2] = 0.f; + colors[indexB*4+3] = 1.f; + } +#endif + + //now color the overlap + + + + glUnmapBuffer( GL_ARRAY_BUFFER); + //if this glFinish is removed, the animation is not always working/blocks + //@todo: figure out why + glFlush(); +} + +void broadphase() +{ + if (useCPU) + { + cpuBroadphase(); + + } + else + { + + glFinish(); + + cl_mem clBuffer = g_interopBuffer->getCLBUffer(); + cl_int ciErrNum = CL_SUCCESS; + ciErrNum = clEnqueueAcquireGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, NULL); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + if (runOpenCLKernels) + { + + gFpIO.m_numObjects = NUM_OBJECTS; + gFpIO.m_positionOffset = (sizeof(cube_vertices) )/4; + gFpIO.m_clObjectsBuffer = clBuffer; + gFpIO.m_dAABB = sBroadphase->m_dAABB; + setupGpuAabbsSimple(gFpIO); + + sBroadphase->calculateOverlappingPairs(0, NUM_OBJECTS); + + + gFpIO.m_dAllOverlappingPairs = sBroadphase->m_dAllOverlappingPairs; + gFpIO.m_numOverlap = sBroadphase->m_numPrefixSum; + + colorPairsOpenCL(gFpIO); + + } + + ciErrNum = clEnqueueReleaseGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, 0); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + clFinish(g_cqCommandQue); + + + + } +} + + +//#pragma optimize( "g", on ) + +void RenderScene(void) +{ + +#if 0 + float modelview[20]={0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9}; + // get the current modelview matrix + glGetFloatv(GL_MODELVIEW_MATRIX , modelview); + float projection[20]={0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9}; + glGetFloatv(GL_PROJECTION_MATRIX, projection); +#endif + + myinit(); + + updateCamera(); + + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + + //render coordinate system + glBegin(GL_LINES); + glColor3f(1,0,0); + glVertex3f(0,0,0); + glVertex3f(1,0,0); + glColor3f(0,1,0); + glVertex3f(0,0,0); + glVertex3f(0,1,0); + glColor3f(0,0,1); + glVertex3f(0,0,0); + glVertex3f(0,0,1); + glEnd(); + + //do a finish, to make sure timings are clean + // glFinish(); + + float start = gStopwatch.getTimeMilliseconds(); + + // glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindBuffer(GL_ARRAY_BUFFER, cube_vbo); + glFlush(); + + updatePos(); + + broadphase(); + + //useCPU = true; + + float stop = gStopwatch.getTimeMilliseconds(); + gStopwatch.reset(); + + if (printStats) + { + printf("updatePos=%f ms on ",stop-start); + + if (useCPU) + { + printf("CPU \n"); + } else + { + printf("OpenCL "); + if (runOpenCLKernels) + printf("running the kernels"); + else + printf("without running the kernels"); + printf("\n"); + } + } + + glBindVertexArray(cube_vao); + + glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, 9*sizeof(float), 0); + glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, 0, (GLvoid *)(sizeof(cube_vertices))); + glVertexAttribPointer(2, 4, GL_FLOAT, GL_FALSE, 0, (GLvoid *)(sizeof(cube_vertices)+POSITION_BUFFER_SIZE)); + int uvoffset = 7*sizeof(float); + int normaloffset = 4*sizeof(float); + + glVertexAttribPointer(3, 2, GL_FLOAT, GL_FALSE, 9*sizeof(float), (GLvoid *)uvoffset); + glVertexAttribPointer(4, 3, GL_FLOAT, GL_FALSE, 9*sizeof(float), (GLvoid *)normaloffset); + glVertexAttribPointer(5, 4, GL_FLOAT, GL_FALSE, 0, (GLvoid *)(sizeof(cube_vertices)+POSITION_BUFFER_SIZE+ORIENTATION_BUFFER_SIZE)); + glVertexAttribPointer(6, 3, GL_FLOAT, GL_FALSE, 0, (GLvoid *)(sizeof(cube_vertices)+POSITION_BUFFER_SIZE+ORIENTATION_BUFFER_SIZE+COLOR_BUFFER_SIZE)); + + glEnableVertexAttribArray(0); + glEnableVertexAttribArray(1); + glEnableVertexAttribArray(2); + glEnableVertexAttribArray(3); + glEnableVertexAttribArray(4); + glEnableVertexAttribArray(5); + glEnableVertexAttribArray(6); + + glVertexAttribDivisor(0, 0); + glVertexAttribDivisor(1, 1); + glVertexAttribDivisor(2, 1); + glVertexAttribDivisor(3, 0); + glVertexAttribDivisor(4, 0); + glVertexAttribDivisor(5, 1); + glVertexAttribDivisor(6, 1); + + glUseProgram(instancingShader); + glUniform1f(angle_loc, 0); + GLfloat pm[16]; + glGetFloatv(GL_PROJECTION_MATRIX, pm); + glUniformMatrix4fv(ProjectionMatrix, 1, false, &pm[0]); + + GLfloat mvm[16]; + glGetFloatv(GL_MODELVIEW_MATRIX, mvm); + glUniformMatrix4fv(ModelViewMatrix, 1, false, &mvm[0]); + + glUniform1i(uniform_texture_diffuse, 0); + + glFlush(); + int numInstances = NUM_OBJECTS; + int indexCount = sizeof(cube_indices)/sizeof(int); + int indexOffset = 0; + + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_vbo); + glDrawElementsInstanced(GL_TRIANGLES, indexCount, GL_UNSIGNED_INT, (void*)indexOffset, numInstances); + + glUseProgram(0); + glBindBuffer(GL_ARRAY_BUFFER,0); + glBindVertexArray(0); + + glutSwapBuffers(); + glutPostRedisplay(); + + GLint err = glGetError(); + assert(err==GL_NO_ERROR); +} + + +void ChangeSize(int w, int h) +{ + m_glutScreenWidth = w; + m_glutScreenHeight = h; + +#ifdef RECREATE_CL_AND_SHADERS_ON_RESIZE + delete g_interopBuffer; + clReleaseKernel(g_sineWaveKernel); + releaseFindPairs(fpio); + DeleteCL(); + DeleteShaders(); +#endif //RECREATE_CL_AND_SHADERS_ON_RESIZE + + // Set Viewport to window dimensions + glViewport(0, 0, w, h); + +#ifdef RECREATE_CL_AND_SHADERS_ON_RESIZE + InitCL(); + InitShaders(); + + g_interopBuffer = new btOpenCLGLInteropBuffer(g_cxMainContext,g_cqCommandQue,cube_vbo); + clFinish(g_cqCommandQue); + g_sineWaveKernel = btOpenCLUtils::compileCLKernelFromString(g_cxMainContext, interopKernelString, "interopKernel" ); + initFindPairs(...); +#endif //RECREATE_CL_AND_SHADERS_ON_RESIZE + +} + +void Keyboard(unsigned char key, int x, int y) +{ + switch (key) + { + case 27: + done = true; + break; + case 'O': + case 'o': + { + m_ortho = !m_ortho; + break; + } + case 'c': + case 'C': + { + useCPU = !useCPU; + if (useCPU) + printf("using CPU\n"); + else + printf("using OpenCL\n"); + break; + } + case 's': + case 'S': + { + printStats = !printStats; + break; + } + case 'k': + case 'K': + { + runOpenCLKernels=!runOpenCLKernels; + break; + } + case 'q': + case 'Q': + exit(0); + default: + break; + } +} + +// Cleanup +void ShutdownRC(void) +{ + glDeleteBuffers(1, &cube_vbo); + glDeleteVertexArrays(1, &cube_vao); +} + +int main(int argc, char* argv[]) +{ + srand(0); + // printf("vertexShader = \n%s\n",vertexShader); + // printf("fragmentShader = \n%s\n",fragmentShader); + + glutInit(&argc, argv); + + glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGBA); + + + glutInitWindowSize(m_glutScreenWidth, m_glutScreenHeight); + char buf[1024]; + sprintf(buf,"OpenCL broadphase benchmark, %d cubes on the GPU", NUM_OBJECTS); + glutCreateWindow(buf); + + glutReshapeFunc(ChangeSize); + + glutMouseFunc(mouseFunc); + glutMotionFunc(mouseMotionFunc); + + glutKeyboardFunc(Keyboard); + glutDisplayFunc(RenderScene); + + GLenum err = glewInit(); + if (GLEW_OK != err) + { + /* Problem: glewInit failed, something is seriously wrong. */ + fprintf(stderr, "Error: %s\n", glewGetErrorString(err)); + } + + //ChangeSize(m_glutScreenWidth,m_glutScreenHeight); + + InitCL(); + + +#define CUSTOM_CL_INITIALIZATION +#ifdef CUSTOM_CL_INITIALIZATION + g_deviceCL = new adl::DeviceCL(); + g_deviceCL->m_deviceIdx = g_device; + g_deviceCL->m_context = g_cxMainContext; + g_deviceCL->m_commandQueue = g_cqCommandQue; + +#else + DeviceUtils::Config cfg; + cfg.m_type = DeviceUtils::Config::DEVICE_CPU; + g_deviceCL = DeviceUtils::allocate( TYPE_CL, cfg ); +#endif + + int size = NUM_OBJECTS; + adl::Buffer linvelBuf( g_deviceCL, size ); + adl::Buffer angvelBuf( g_deviceCL, size ); + adl::Buffer bodyTimes(g_deviceCL,size); + + gLinVelMem = (cl_mem)linvelBuf.m_ptr; + gAngVelMem = (cl_mem)angvelBuf.m_ptr; + gBodyTimes = (cl_mem)bodyTimes.m_ptr; + + btVector3* linVelHost= new btVector3[size]; + btVector3* angVelHost = new btVector3[size]; + float* bodyTimesHost = new float[size]; + + for (int i=0;i + +cl_context g_cxMainContext; +cl_command_queue g_cqCommandQue; +cl_kernel g_atomicsKernel; +static const size_t workGroupSize = 128;//todo figure out an appropriate workgroup size suitable for the OpenCL platform/context/device/kernel +#define NUM_OBJECTS 1024 + +#include "globalAtomicsKernel.h" + + +char * findAndReplace( char const * const original, char const * const pattern, char const * const replacement); + + +#include +#include + + +int main(int argc, char* argv[]) +{ + int ciErrNum = 0; + + printf("press a key to start\n"); + getchar(); + + const char* vendorSDK = btOpenCLUtils::getSdkVendorName(); + printf("This program was compiled using the %s OpenCL SDK\n",vendorSDK); + + cl_device_type deviceType = CL_DEVICE_TYPE_GPU;//CL_DEVICE_TYPE_ALL + + void* glCtx=0; + void* glDC = 0; + printf("Initialize OpenCL using btOpenCLUtils::createContextFromType for CL_DEVICE_TYPE_GPU\n"); + g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + + int numDev = btOpenCLUtils::getNumDevices(g_cxMainContext); + + if (numDev>0) + { + int deviceIndex=0; + + cl_device_id device; + device = btOpenCLUtils::getDevice(g_cxMainContext,deviceIndex); + btOpenCLDeviceInfo clInfo; + btOpenCLUtils::getDeviceInfo(device,clInfo); + btOpenCLUtils::printDeviceInfo(device); + + + const char* globalAtomicsKernelStringPatched = globalAtomicsKernelString; + if (!strstr(clInfo.m_deviceExtensions,"cl_ext_atomic_counters_32")) + { + globalAtomicsKernelStringPatched = findAndReplace(globalAtomicsKernelString,"counter32_t", "volatile __global int*"); + } + + + + // create a command-queue + g_cqCommandQue = clCreateCommandQueue(g_cxMainContext, device, 0, &ciErrNum); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + + cl_mem counterBuffer = clCreateBuffer(g_cxMainContext, CL_MEM_READ_WRITE, sizeof(int), NULL, &ciErrNum); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + + char* kernelMethods[] = + { + "globalAtomicKernelOpenCL1_1", + "counterAtomicKernelExt", + "globalAtomicKernelExt", + "globalAtomicKernelCounters32Broken" + }; + int numKernelMethods = sizeof(kernelMethods)/sizeof(char*); + + for (int i=0;i +#include + +char * findAndReplace( + char const * const original, + char const * const pattern, + char const * const replacement +) { + size_t const replen = strlen(replacement); + size_t const patlen = strlen(pattern); + size_t const orilen = strlen(original); + + size_t patcnt = 0; + const char * oriptr; + const char * patloc; + + // find how many times the pattern occurs in the original string + for (oriptr = original; patloc = strstr(oriptr, pattern); oriptr = patloc + patlen) + { + patcnt++; + } + + { + // allocate memory for the new string + size_t const retlen = orilen + patcnt * (replen - patlen); + char * const returned = (char *) malloc( sizeof(char) * (retlen + 1) ); + + if (returned != NULL) + { + // copy the original string, + // replacing all the instances of the pattern + char * retptr = returned; + for (oriptr = original; patloc = strstr(oriptr, pattern); oriptr = patloc + patlen) + { + size_t const skplen = patloc - oriptr; + // copy the section until the occurence of the pattern + strncpy(retptr, oriptr, skplen); + retptr += skplen; + // copy the replacement + strncpy(retptr, replacement, replen); + retptr += replen; + } + // copy the rest of the string. + strcpy(retptr, oriptr); + } + return returned; + } +} + +#ifdef _WIN32 +#pragma warning( pop ) +#endif //_WIN32 diff --git a/Extras/RigidBodyGpuPipeline/opencl/global_atomics/premake4.lua b/Extras/RigidBodyGpuPipeline/opencl/global_atomics/premake4.lua new file mode 100644 index 000000000..3a926c990 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/global_atomics/premake4.lua @@ -0,0 +1,4 @@ + + include "AMD" + --include "Intel" + --include "NVIDIA" diff --git a/Extras/RigidBodyGpuPipeline/opencl/global_atomics/stringify.py b/Extras/RigidBodyGpuPipeline/opencl/global_atomics/stringify.py new file mode 100644 index 000000000..e79e281e4 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/global_atomics/stringify.py @@ -0,0 +1,13 @@ +#!/usr/bin/env python +import sys +import os +import shutil + +arg = sys.argv[1] +fh = open(arg) + +print 'static const char* '+sys.argv[2]+'= \\' +for line in fh.readlines(): + a = line.strip('\n') + print '"'+a+'\\n"' +print ';' diff --git a/Extras/RigidBodyGpuPipeline/opencl/global_atomics/stringifykernels.bat b/Extras/RigidBodyGpuPipeline/opencl/global_atomics/stringifykernels.bat new file mode 100644 index 000000000..1415f8e5b --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/global_atomics/stringifykernels.bat @@ -0,0 +1,5 @@ +stringify.py global_atomics.cl globalAtomicsKernelString >globalAtomicsKernel.h + + + + diff --git a/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline/AMD/premake4.lua b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline/AMD/premake4.lua new file mode 100644 index 000000000..2000f3ea2 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline/AMD/premake4.lua @@ -0,0 +1,58 @@ + + hasCL = findOpenCL_AMD() + + if (hasCL) then + + project "OpenCL_gpu_rigidbody_pipeline_AMD" + + initOpenCL_AMD() + + language "C++" + + kind "ConsoleApp" + targetdir "../../../bin" + + initOpenGL() + initGlut() + initGlew() + + + includedirs { + "../../../rendering/BulletMath", + "../../primitives", + "../../../../../src" + } + + files { + "../main.cpp", + "../btConvexUtility.cpp", + "../btConvexUtility.h", + "../btGpuNarrowPhaseAndSolver.cpp", + "../btGpuNarrowPhaseAndSolver.h", + "../../../dynamics/basic_demo/ConvexHeightFieldShape.cpp", + "../../../dynamics/basic_demo/ConvexHeightFieldShape.h", + "../../../../../src/LinearMath/btConvexHullComputer.cpp", + "../../../../../src/LinearMath/btConvexHullComputer.h", + "../../broadphase_benchmark/findPairsOpenCL.cpp", + "../../broadphase_benchmark/findPairsOpenCL.h", + "../../broadphase_benchmark/btGridBroadphaseCL.cpp", + "../../broadphase_benchmark/btGridBroadphaseCL.h", + "../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.cpp", + "../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.h", + "../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.cpp", + "../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.h", + "../../../../../src/LinearMath/btAlignedAllocator.cpp", + "../../../../../src/LinearMath/btQuickprof.cpp", + "../../../../../src/LinearMath/btQuickprof.h", + "../../../../../src/BulletCollision/BroadphaseCollision/btBroadphaseProxy.cpp", + "../../../../../src/BulletCollision/BroadphaseCollision/btOverlappingPairCache.cpp", + "../../../../../src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp", + "../../basic_initialize/btOpenCLUtils.cpp", + "../../basic_initialize/btOpenCLUtils.h", + "../../opengl_interop/btOpenCLGLInteropBuffer.cpp", + "../../opengl_interop/btOpenCLGLInteropBuffer.h", + "../../opengl_interop/btStopwatch.cpp", + "../../opengl_interop/btStopwatch.h" + } + + end \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline/CommandLineArgs.h b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline/CommandLineArgs.h new file mode 100644 index 000000000..b64610303 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline/CommandLineArgs.h @@ -0,0 +1,91 @@ +#ifndef COMMAND_LINE_ARGS_H +#define COMMAND_LINE_ARGS_H + +/****************************************************************************** + * Command-line parsing + ******************************************************************************/ +#include +#include +#include +#include +class CommandLineArgs +{ +protected: + + std::map pairs; + +public: + + // Constructor + CommandLineArgs(int argc, char **argv) + { + using namespace std; + + for (int i = 1; i < argc; i++) + { + string arg = argv[i]; + + if ((arg[0] != '-') || (arg[1] != '-')) { + continue; + } + + string::size_type pos; + string key, val; + if ((pos = arg.find( '=')) == string::npos) { + key = string(arg, 2, arg.length() - 2); + val = ""; + } else { + key = string(arg, 2, pos - 2); + val = string(arg, pos + 1, arg.length() - 1); + } + pairs[key] = val; + } + } + + bool CheckCmdLineFlag(const char* arg_name) + { + using namespace std; + map::iterator itr; + if ((itr = pairs.find(arg_name)) != pairs.end()) { + return true; + } + return false; + } + + template + void GetCmdLineArgument(const char *arg_name, T &val); + + int ParsedArgc() + { + return pairs.size(); + } +}; + +template +void CommandLineArgs::GetCmdLineArgument(const char *arg_name, T &val) +{ + using namespace std; + map::iterator itr; + if ((itr = pairs.find(arg_name)) != pairs.end()) { + istringstream strstream(itr->second); + strstream >> val; + } +} + +template <> +void CommandLineArgs::GetCmdLineArgument(const char* arg_name, char* &val) +{ + using namespace std; + map::iterator itr; + if ((itr = pairs.find(arg_name)) != pairs.end()) { + + string s = itr->second; + val = (char*) malloc(sizeof(char) * (s.length() + 1)); + strcpy(val, s.c_str()); + + } else { + val = NULL; + } +} + +#endif //COMMAND_LINE_ARGS_H diff --git a/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline/Intel/premake4.lua b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline/Intel/premake4.lua new file mode 100644 index 000000000..541dc7941 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline/Intel/premake4.lua @@ -0,0 +1,58 @@ + + hasCL = findOpenCL_Intel() + + if (hasCL) then + + project "OpenCL_gpu_rigidbody_pipeline_Intel" + + initOpenCL_Intel() + + language "C++" + + kind "ConsoleApp" + targetdir "../../../bin" + + initOpenGL() + initGlut() + initGlew() + + + includedirs { + "../../../rendering/BulletMath", + "../../primitives", + "../../../../../src" + } + + files { + "../main.cpp", + "../btConvexUtility.cpp", + "../btConvexUtility.h", + "../btGpuNarrowPhaseAndSolver.cpp", + "../btGpuNarrowPhaseAndSolver.h", + "../../../dynamics/basic_demo/ConvexHeightFieldShape.cpp", + "../../../dynamics/basic_demo/ConvexHeightFieldShape.h", + "../../../../../src/LinearMath/btConvexHullComputer.cpp", + "../../../../../src/LinearMath/btConvexHullComputer.h", + "../../broadphase_benchmark/findPairsOpenCL.cpp", + "../../broadphase_benchmark/findPairsOpenCL.h", + "../../broadphase_benchmark/btGridBroadphaseCL.cpp", + "../../broadphase_benchmark/btGridBroadphaseCL.h", + "../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.cpp", + "../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.h", + "../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.cpp", + "../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.h", + "../../../../../src/LinearMath/btAlignedAllocator.cpp", + "../../../../../src/LinearMath/btQuickprof.cpp", + "../../../../../src/LinearMath/btQuickprof.h", + "../../../../../src/BulletCollision/BroadphaseCollision/btBroadphaseProxy.cpp", + "../../../../../src/BulletCollision/BroadphaseCollision/btOverlappingPairCache.cpp", + "../../../../../src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp", + "../../basic_initialize/btOpenCLUtils.cpp", + "../../basic_initialize/btOpenCLUtils.h", + "../../opengl_interop/btOpenCLGLInteropBuffer.cpp", + "../../opengl_interop/btOpenCLGLInteropBuffer.h", + "../../opengl_interop/btStopwatch.cpp", + "../../opengl_interop/btStopwatch.h" + } + + end \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline/NVIDIA/premake4.lua b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline/NVIDIA/premake4.lua new file mode 100644 index 000000000..095e08f39 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline/NVIDIA/premake4.lua @@ -0,0 +1,57 @@ + + hasCL = findOpenCL_NVIDIA() + + if (hasCL) then + + project "OpenCL_gpu_rigidbody_pipeline_NVIDIA" + + initOpenCL_NVIDIA() + + language "C++" + + kind "ConsoleApp" + targetdir "../../../bin" + + initOpenGL() + initGlut() + initGlew() + + includedirs { + "../../../rendering/BulletMath", + "../../primitives", + "../../../../../src" + } + + files { + "../main.cpp", + "../btConvexUtility.cpp", + "../btConvexUtility.h", + "../btGpuNarrowPhaseAndSolver.cpp", + "../btGpuNarrowPhaseAndSolver.h", + "../../../dynamics/basic_demo/ConvexHeightFieldShape.cpp", + "../../../dynamics/basic_demo/ConvexHeightFieldShape.h", + "../../../../../src/LinearMath/btConvexHullComputer.cpp", + "../../../../../src/LinearMath/btConvexHullComputer.h", + "../../broadphase_benchmark/findPairsOpenCL.cpp", + "../../broadphase_benchmark/findPairsOpenCL.h", + "../../broadphase_benchmark/btGridBroadphaseCL.cpp", + "../../broadphase_benchmark/btGridBroadphaseCL.h", + "../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.cpp", + "../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.h", + "../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.cpp", + "../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.h", + "../../../../../src/LinearMath/btAlignedAllocator.cpp", + "../../../../../src/LinearMath/btQuickprof.cpp", + "../../../../../src/LinearMath/btQuickprof.h", + "../../../../../src/BulletCollision/BroadphaseCollision/btBroadphaseProxy.cpp", + "../../../../../src/BulletCollision/BroadphaseCollision/btOverlappingPairCache.cpp", + "../../../../../src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp", + "../../basic_initialize/btOpenCLUtils.cpp", + "../../basic_initialize/btOpenCLUtils.h", + "../../opengl_interop/btOpenCLGLInteropBuffer.cpp", + "../../opengl_interop/btOpenCLGLInteropBuffer.h", + "../../opengl_interop/btStopwatch.cpp", + "../../opengl_interop/btStopwatch.h" + } + + end \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline/btConvexUtility.cpp b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline/btConvexUtility.cpp new file mode 100644 index 000000000..7f24449f7 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline/btConvexUtility.cpp @@ -0,0 +1,240 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Erwin Coumans + + +#include "btConvexUtility.h" +#include "LinearMath/btConvexHullComputer.h" +#include "LinearMath/btGrahamScan2dConvexHull.h" +#include "LinearMath/btQuaternion.h" + +bool btConvexUtility::initializePolyhedralFeatures(const btAlignedObjectArray& orgVertices, bool mergeCoplanarTriangles) +{ + + + btConvexHullComputer conv; + conv.compute(&orgVertices[0].getX(), sizeof(btVector3),orgVertices.size(),0.f,0.f); + + btAlignedObjectArray faceNormals; + int numFaces = conv.faces.size(); + faceNormals.resize(numFaces); + btConvexHullComputer* convexUtil = &conv; + + + btAlignedObjectArray tmpFaces; + tmpFaces.resize(numFaces); + + int numVertices = convexUtil->vertices.size(); + m_vertices.resize(numVertices); + for (int p=0;pvertices[p]; + } + + + for (int i=0;ifaces[i]; + //printf("face=%d\n",face); + const btConvexHullComputer::Edge* firstEdge = &convexUtil->edges[face]; + const btConvexHullComputer::Edge* edge = firstEdge; + + btVector3 edges[3]; + int numEdges = 0; + //compute face normals + + btScalar maxCross2 = 0.f; + int chosenEdge = -1; + + do + { + + int src = edge->getSourceVertex(); + tmpFaces[i].m_indices.push_back(src); + int targ = edge->getTargetVertex(); + btVector3 wa = convexUtil->vertices[src]; + + btVector3 wb = convexUtil->vertices[targ]; + btVector3 newEdge = wb-wa; + newEdge.normalize(); + if (numEdges<2) + edges[numEdges++] = newEdge; + + edge = edge->getNextEdgeOfFace(); + } while (edge!=firstEdge); + + btScalar planeEq = 1e30f; + + + if (numEdges==2) + { + faceNormals[i] = edges[0].cross(edges[1]); + faceNormals[i].normalize(); + tmpFaces[i].m_plane[0] = faceNormals[i].getX(); + tmpFaces[i].m_plane[1] = faceNormals[i].getY(); + tmpFaces[i].m_plane[2] = faceNormals[i].getZ(); + tmpFaces[i].m_plane[3] = planeEq; + + } + else + { + btAssert(0);//degenerate? + faceNormals[i].setZero(); + } + + for (int v=0;veq) + { + planeEq=eq; + } + } + tmpFaces[i].m_plane[3] = -planeEq; + } + + //merge coplanar faces + + btScalar faceWeldThreshold= 0.999f; + btAlignedObjectArray todoFaces; + for (int i=0;i coplanarFaceGroup; + int refFace = todoFaces[todoFaces.size()-1]; + + coplanarFaceGroup.push_back(refFace); + btFace& faceA = tmpFaces[refFace]; + todoFaces.pop_back(); + + btVector3 faceNormalA(faceA.m_plane[0],faceA.m_plane[1],faceA.m_plane[2]); + for (int j=todoFaces.size()-1;j>=0;j--) + { + int i = todoFaces[j]; + btFace& faceB = tmpFaces[i]; + btVector3 faceNormalB(faceB.m_plane[0],faceB.m_plane[1],faceB.m_plane[2]); + if (faceNormalA.dot(faceNormalB)>faceWeldThreshold) + { + coplanarFaceGroup.push_back(i); + todoFaces.remove(i); + } + } + + + bool did_merge = false; + if (mergeCoplanarTriangles && coplanarFaceGroup.size()>1) + { + //do the merge: use Graham Scan 2d convex hull + + btAlignedObjectArray orgpoints; + + for (int i=0;i hull; + GrahamScanConvexHull2D(orgpoints,hull); + + for (int i=0;i m_indices; +// btAlignedObjectArray m_connectedFaces; + btScalar m_plane[4]; +}; + +class btConvexUtility +{ + public: + + btAlignedObjectArray m_vertices; + btAlignedObjectArray m_faces; + + bool initializePolyhedralFeatures(const btAlignedObjectArray& orgVertices, bool mergeCoplanarTriangles); + +}; +#endif + \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline/btGpuNarrowPhaseAndSolver.cpp b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline/btGpuNarrowPhaseAndSolver.cpp new file mode 100644 index 000000000..6f377afd5 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline/btGpuNarrowPhaseAndSolver.cpp @@ -0,0 +1,730 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Erwin Coumans + +#include "btGpuNarrowphaseAndSolver.h" + +//#include "CustomConvexShape.h" +//#include "CustomConvexPairCollision.h" +#include "LinearMath/btQuickprof.h" + + +//#include "BulletDynamics/Dynamics/btRigidBody.h" + +#include "Adl/Adl.h" +#include "../../dynamics/basic_demo/Stubs/AdlMath.h" +#include "../../dynamics/basic_demo/Stubs/AdlContact4.h" +#include "../../dynamics/basic_demo/Stubs/AdlQuaternion.h" +#include "../../dynamics/basic_demo/Stubs/ChNarrowPhase.h" +#include "../../dynamics/basic_demo/Stubs/Solver.h" +#include + +int gpuBatchContacts = 1; + +int numPairsOut =0; +struct CPUSolveData +{ + u32 m_n[adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT]; + u32 m_offset[adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT]; +}; + + +struct ParallelSolveData +{ + adl::Buffer* m_numConstraints; + adl::Buffer* m_offsets; +}; + +struct CustomDispatchData +{ + adl::DeviceCL* m_deviceCL; + adl::Device* m_deviceHost; + ShapeDataType m_ShapeBuffer; + adl::HostBuffer* m_shapePointers; + + adl::HostBuffer* m_pBufPairsCPU; + + adl::Buffer* m_convexPairsOutGPU; + adl::Buffer* m_planePairs; + + adl::Buffer* m_pBufContactOutGPU; + adl::HostBuffer* m_pBufContactOutCPU; + adl::ChNarrowphase::Data* m_Data; + + + + adl::HostBuffer* m_bodyBufferCPU; + adl::Buffer* m_bodyBufferGPU; + + adl::Buffer* m_inertiaBufferCPU; + adl::Buffer* m_inertiaBufferGPU; + + adl::Solver::Data* m_solverDataGPU; + SolverData m_contactCGPU; + void* m_frictionCGPU; + + int m_numAcceleratedShapes; + int m_numAcceleratedRigidBodies; +}; + + +btGpuNarrowphaseAndSolver::btGpuNarrowphaseAndSolver(adl::DeviceCL* deviceCL) + :m_internalData(0) ,m_planeBodyIndex(-1) +{ + + if (deviceCL) + { + m_internalData = new CustomDispatchData(); + memset(m_internalData,0,sizeof(CustomDispatchData)); + + adl::DeviceUtils::Config cfg; + m_internalData->m_deviceCL = deviceCL; + + + m_internalData->m_deviceHost = adl::DeviceUtils::allocate( adl::TYPE_HOST, cfg ); + m_internalData->m_pBufPairsCPU = new adl::HostBuffer(m_internalData->m_deviceHost, MAX_BROADPHASE_COLLISION_CL); + + m_internalData->m_convexPairsOutGPU = new adl::Buffer(m_internalData->m_deviceCL,MAX_BROADPHASE_COLLISION_CL); + m_internalData->m_planePairs = new adl::Buffer(m_internalData->m_deviceCL,MAX_BROADPHASE_COLLISION_CL); + + m_internalData->m_pBufContactOutCPU = new adl::HostBuffer(m_internalData->m_deviceHost, MAX_BROADPHASE_COLLISION_CL); + m_internalData->m_bodyBufferCPU = new adl::HostBuffer(m_internalData->m_deviceHost, MAX_CONVEX_BODIES_CL); + + m_internalData->m_inertiaBufferCPU = new adl::Buffer(m_internalData->m_deviceHost,MAX_CONVEX_BODIES_CL); + m_internalData->m_pBufContactOutGPU = new adl::Buffer(m_internalData->m_deviceCL, MAX_BROADPHASE_COLLISION_CL); + m_internalData->m_inertiaBufferGPU = new adl::Buffer(m_internalData->m_deviceCL,MAX_CONVEX_BODIES_CL); + + m_internalData->m_solverDataGPU = adl::Solver::allocate( m_internalData->m_deviceCL, MAX_BROADPHASE_COLLISION_CL); + m_internalData->m_bodyBufferGPU = new adl::Buffer(m_internalData->m_deviceCL, MAX_CONVEX_BODIES_CL); + m_internalData->m_Data = adl::ChNarrowphase::allocate(m_internalData->m_deviceCL); +// m_internalData->m_DataCPU = adl::ChNarrowphase::allocate(m_internalData->m_deviceHost); + + + m_internalData->m_ShapeBuffer = adl::ChNarrowphase::allocateShapeBuffer(m_internalData->m_deviceCL, MAX_CONVEX_SHAPES_CL); + + m_internalData->m_shapePointers = new adl::HostBuffer(m_internalData->m_deviceHost,MAX_CONVEX_SHAPES_CL); + + m_internalData->m_numAcceleratedShapes = 0; + m_internalData->m_numAcceleratedRigidBodies = 0; + + m_internalData->m_contactCGPU = adl::Solver::allocateConstraint4( m_internalData->m_deviceCL, MAX_BROADPHASE_COLLISION_CL); + m_internalData->m_frictionCGPU = adl::Solver::allocateFrictionConstraint( m_internalData->m_deviceCL, MAX_BROADPHASE_COLLISION_CL); + + } +} + +int btGpuNarrowphaseAndSolver::registerShape(ConvexHeightField* convexShape) +{ + (*m_internalData->m_shapePointers)[m_internalData->m_numAcceleratedShapes] = convexShape; + adl::ChNarrowphase::setShape(m_internalData->m_ShapeBuffer, convexShape, m_internalData->m_numAcceleratedShapes, 0.01f); + return m_internalData->m_numAcceleratedShapes++; +} + +cl_mem btGpuNarrowphaseAndSolver::getBodiesGpu() +{ + return (cl_mem)m_internalData->m_bodyBufferGPU->m_ptr; +} + +cl_mem btGpuNarrowphaseAndSolver::getBodyInertiasGpu() +{ + return (cl_mem)m_internalData->m_inertiaBufferGPU->m_ptr; +} + + +int btGpuNarrowphaseAndSolver::registerRigidBody(int shapeIndex, float mass, const float* position, const float* orientation , bool writeToGpu) +{ + assert(m_internalData->m_numAcceleratedRigidBodies< (MAX_CONVEX_BODIES_CL-1)); + + RigidBodyBase::Body& body = m_internalData->m_bodyBufferCPU->m_ptr[m_internalData->m_numAcceleratedRigidBodies]; + + float friction = 1.f; + float restitution = 0.f; + + body.m_frictionCoeff = friction; + body.m_restituitionCoeff = restitution; + body.m_angVel = make_float4(0.f); + body.m_linVel = make_float4(0.f); + body.m_pos = make_float4(position[0],position[1],position[2],0.f); + body.m_quat = make_float4(orientation[0],orientation[1],orientation[2],orientation[3]); + body.m_shapeIdx = shapeIndex; + if (shapeIndex<0) + { + body.m_shapeType = CollisionShape::SHAPE_PLANE; + m_planeBodyIndex = m_internalData->m_numAcceleratedRigidBodies; + } else + { + body.m_shapeType = CollisionShape::SHAPE_CONVEX_HEIGHT_FIELD; + } + + body.m_invMass = mass? 1.f/mass : 0.f; + + if (writeToGpu) + m_internalData->m_bodyBufferGPU->write(&body,1,m_internalData->m_numAcceleratedRigidBodies); + + RigidBodyBase::Inertia& shapeInfo = m_internalData->m_inertiaBufferCPU->m_ptr[m_internalData->m_numAcceleratedRigidBodies]; + + if (mass==0.f) + { + shapeInfo.m_initInvInertia = mtZero(); + shapeInfo.m_invInertia = mtZero(); + } else + { + + assert(body.m_shapeIdx>=0); + + //approximate using the aabb of the shape + + Aabb aabb = (*m_internalData->m_shapePointers)[shapeIndex]->m_aabb; + float4 halfExtents = (aabb.m_max - aabb.m_min); + + float4 localInertia; + + float lx=2.f*halfExtents.x; + float ly=2.f*halfExtents.y; + float lz=2.f*halfExtents.z; + + localInertia = make_float4( (mass/12.0f) * (ly*ly + lz*lz), + (mass/12.0f) * (lx*lx + lz*lz), + (mass/12.0f) * (lx*lx + ly*ly)); + + float4 invLocalInertia; + invLocalInertia.x = 1.f/localInertia.x; + invLocalInertia.y = 1.f/localInertia.y; + invLocalInertia.z = 1.f/localInertia.z; + invLocalInertia.w = 0.f; + + shapeInfo.m_initInvInertia = mtZero(); + shapeInfo.m_initInvInertia.m_row[0].x = invLocalInertia.x; + shapeInfo.m_initInvInertia.m_row[1].y = invLocalInertia.y; + shapeInfo.m_initInvInertia.m_row[2].z = invLocalInertia.z; + + Matrix3x3 m = qtGetRotationMatrix( body.m_quat); + Matrix3x3 mT = mtTranspose( m ); + shapeInfo.m_invInertia = mtMul( mtMul( m, shapeInfo.m_initInvInertia ), mT ); + + } + + if (writeToGpu) + m_internalData->m_inertiaBufferGPU->write(&shapeInfo,1,m_internalData->m_numAcceleratedRigidBodies); + return m_internalData->m_numAcceleratedRigidBodies++; +} + +void btGpuNarrowphaseAndSolver::writeAllBodiesToGpu() +{ + m_internalData->m_bodyBufferGPU->write(m_internalData->m_bodyBufferCPU->m_ptr,m_internalData->m_numAcceleratedRigidBodies); + m_internalData->m_inertiaBufferGPU->write( m_internalData->m_inertiaBufferCPU->m_ptr,m_internalData->m_numAcceleratedRigidBodies); +} + + + +btGpuNarrowphaseAndSolver::~btGpuNarrowphaseAndSolver(void) +{ + if (m_internalData) + { + delete m_internalData->m_pBufPairsCPU; + delete m_internalData->m_convexPairsOutGPU; + delete m_internalData->m_planePairs; + delete m_internalData->m_pBufContactOutGPU; + delete m_internalData->m_inertiaBufferGPU; + delete m_internalData->m_pBufContactOutCPU; + delete m_internalData->m_shapePointers; + adl::ChNarrowphase::deallocateShapeBuffer(m_internalData->m_ShapeBuffer); + delete m_internalData->m_inertiaBufferCPU; + adl::Solver::deallocateConstraint4( m_internalData->m_contactCGPU ); + adl::Solver::deallocateFrictionConstraint( m_internalData->m_frictionCGPU ); + + delete m_internalData->m_bodyBufferGPU; + adl::Solver::deallocate( m_internalData->m_solverDataGPU); + delete m_internalData->m_bodyBufferCPU; + adl::ChNarrowphase::deallocate(m_internalData->m_Data); + + + + adl::DeviceUtils::deallocate(m_internalData->m_deviceHost); + + delete m_internalData; + } + +} + + + + + +void btGpuNarrowphaseAndSolver::computeContactsAndSolver(cl_mem broadphasePairs, int numBroadphasePairs) +{ + + BT_PROFILE("computeContactsAndSolver"); + bool bGPU = (m_internalData != 0); + int maxBodyIndex = m_internalData->m_numAcceleratedRigidBodies; + + if (!maxBodyIndex) + return; + int numOfConvexRBodies = maxBodyIndex; + + adl::ChNarrowphaseBase::Config cfgNP; + cfgNP.m_collisionMargin = 0.01f; + int nContactOut = 0; + //printf("convexPairsOut.m_size = %d\n",m_internalData->m_convexPairsOutGPU->m_size); + + + adl::Buffer broadphasePairsGPU; + broadphasePairsGPU.m_ptr = (int2*)broadphasePairs; + broadphasePairsGPU.m_size = numBroadphasePairs; + broadphasePairsGPU.m_device = m_internalData->m_deviceCL; + + + bool useCulling = true; + if (useCulling) + { + BT_PROFILE("ChNarrowphase::culling"); + adl::DeviceUtils::waitForCompletion(m_internalData->m_deviceCL); + + numPairsOut = adl::ChNarrowphase::culling( + m_internalData->m_Data, + &broadphasePairsGPU, + numBroadphasePairs, + m_internalData->m_bodyBufferGPU, m_internalData->m_ShapeBuffer, + m_internalData->m_convexPairsOutGPU, + cfgNP); + } + + { + BT_PROFILE("ChNarrowphase::execute"); + if (useCulling) + { + + if (m_planeBodyIndex>=0) + { + BT_PROFILE("ChNarrowphase:: plane versus convex"); + //todo: get rid of this dynamic allocation + int2* hostPairs = new int2[m_internalData->m_numAcceleratedRigidBodies-1]; + int index=0; + for (int i=0;im_numAcceleratedRigidBodies;i++) + { + if (i!=m_planeBodyIndex) + { + hostPairs[index].x = m_planeBodyIndex; + hostPairs[index].y = i; + index++; + } + } + assert(m_internalData->m_numAcceleratedRigidBodies-1 == index); + m_internalData->m_planePairs->write(hostPairs,index); + adl::DeviceUtils::waitForCompletion(m_internalData->m_deviceCL); + delete[]hostPairs; + //convex versus plane + adl::ChNarrowphase::execute(m_internalData->m_Data, m_internalData->m_planePairs, index, m_internalData->m_bodyBufferGPU, m_internalData->m_ShapeBuffer, + 0,0,m_internalData->m_pBufContactOutGPU, nContactOut, cfgNP); + } + + //convex versus convex + adl::ChNarrowphase::execute(m_internalData->m_Data, m_internalData->m_convexPairsOutGPU,numPairsOut, m_internalData->m_bodyBufferGPU, m_internalData->m_ShapeBuffer, m_internalData->m_pBufContactOutGPU, nContactOut, cfgNP); + } else + { + adl::ChNarrowphase::execute(m_internalData->m_Data, &broadphasePairsGPU, numBroadphasePairs, m_internalData->m_bodyBufferGPU, m_internalData->m_ShapeBuffer, m_internalData->m_pBufContactOutGPU, nContactOut, cfgNP); + } + + adl::DeviceUtils::waitForCompletion(m_internalData->m_deviceCL); + } + + if (!nContactOut) + return; + + + bool useSolver = true;//true;//false; + + if (useSolver) + { + float dt=1./60.; + adl::SolverBase::ConstraintCfg csCfg( dt ); + csCfg.m_enableParallelSolve = true; + csCfg.m_averageExtent = 0.2f;//@TODO m_averageObjExtent; + csCfg.m_staticIdx = m_planeBodyIndex; + + + bool exposeInternalBatchImplementation=true; + + adl::Solver::Data* cpuSolverData = 0; + if (exposeInternalBatchImplementation) + { + BT_PROFILE("Batching"); + + cpuSolverData = adl::Solver::allocate( m_internalData->m_deviceHost, nContactOut); + + adl::Buffer* contactsIn = m_internalData->m_pBufContactOutGPU; + const adl::Buffer* bodyBuf = m_internalData->m_bodyBufferGPU; + void* additionalData = m_internalData->m_frictionCGPU; + const adl::Buffer* shapeBuf = m_internalData->m_inertiaBufferGPU; + SolverData contactCOut = m_internalData->m_contactCGPU; + int nContacts = nContactOut; + + bool useCPU=false; + + if (useCPU) + { + BT_PROFILE("CPU batch"); + { + BT_PROFILE("CPU sortContacts2"); + sortContacts2( cpuSolverData, bodyBuf, contactsIn, additionalData, nContacts, csCfg ); + } + + CPUSolveData* dataCPU = (CPUSolveData*)cpuSolverData->m_parallelSolveData; + { + BT_PROFILE("CPU batchContacts2"); + + adl::Buffer n; n.setRawPtr( cpuSolverData->m_device, dataCPU->m_n, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT ); + adl::Buffer offsets; offsets.setRawPtr( cpuSolverData->m_device, dataCPU->m_offset, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT ); + batchContacts2( cpuSolverData, contactsIn, nContacts, &n, &offsets, csCfg.m_staticIdx ); + } + + { + BT_PROFILE("CPU convertToConstraints2"); + convertToConstraints2( cpuSolverData, bodyBuf, shapeBuf, contactsIn, contactCOut, additionalData, nContacts, csCfg ); + } + + { + BT_PROFILE("CPU -> GPU copy"); + ParallelSolveData* dataGPU = (ParallelSolveData*)m_internalData->m_solverDataGPU->m_parallelSolveData; + dataGPU->m_numConstraints->write(dataCPU->m_n,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT); + dataGPU->m_offsets->write(dataCPU->m_offset,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT); + adl::DeviceUtils::waitForCompletion( m_internalData->m_deviceCL); + } + + } + else + { + BT_PROFILE("GPU batch"); + + adl::Solver::Data* data = m_internalData->m_solverDataGPU; + + { + if( data->m_contactBuffer ) + { + if( data->m_contactBuffer->getSize() < nContacts ) + { + BT_PROFILE("delete data->m_contactBuffer;"); + delete data->m_contactBuffer; + data->m_contactBuffer = 0; + } + } + if( data->m_contactBuffer == 0 ) + { + data->m_contactBuffer = new adl::Buffer( data->m_device, nContacts ); + } + + adl::Buffer* contactNative = contactsIn; + + ParallelSolveData* nativeSolveData = (ParallelSolveData*)data->m_parallelSolveData; + + { + + ADLASSERT( data->m_device->m_type == adl::TYPE_CL ); + adl::Buffer* bodyNative = adl::BufferUtils::map( data->m_device, bodyBuf ); + adl::Buffer* contactNative = adl::BufferUtils::map( data->m_device, contactsIn ); + + const int sortAlignment = 512; // todo. get this out of sort + if( csCfg.m_enableParallelSolve ) + { + ParallelSolveData* nativeSolveData = (ParallelSolveData*)data->m_parallelSolveData; + + int sortSize = NEXTMULTIPLEOF( nContacts, sortAlignment ); + + adl::Buffer* countsNative = nativeSolveData->m_numConstraints;//BufferUtils::map( data->m_device, &countsHost ); + adl::Buffer* offsetsNative = nativeSolveData->m_offsets;//BufferUtils::map( data->m_device, &offsetsHost ); + + { // 2. set cell idx + BT_PROFILE("GPU set cell idx"); + struct CB + { + int m_nContacts; + int m_staticIdx; + float m_scale; + int m_nSplit; + }; + + ADLASSERT( sortSize%64 == 0 ); + CB cdata; + cdata.m_nContacts = nContacts; + cdata.m_staticIdx = csCfg.m_staticIdx; + cdata.m_scale = 1.f/(adl::SolverBase::N_OBJ_PER_SPLIT*csCfg.m_averageExtent); + cdata.m_nSplit = adl::SolverBase::N_SPLIT; + + adl::Buffer constBuffer( data->m_device, 1, adl::BufferBase::BUFFER_CONST ); + adl::Launcher::BufferInfo bInfo[] = { adl::Launcher::BufferInfo( contactNative ), adl::Launcher::BufferInfo( bodyNative ), adl::Launcher::BufferInfo( data->m_sortDataBuffer ) }; + adl::Launcher launcher( data->m_device, data->m_setSortDataKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(adl::Launcher::BufferInfo) ); + launcher.setConst( constBuffer, cdata ); + launcher.launch1D( sortSize, 64 ); + } + bool gpuRadixSort=true; + if (gpuRadixSort) + { // 3. sort by cell idx + BT_PROFILE("gpuRadixSort"); + int n = adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT; + int sortBit = 32; + //if( n <= 0xffff ) sortBit = 16; + //if( n <= 0xff ) sortBit = 8; + //adl::RadixSort::execute( data->m_sort, *data->m_sortDataBuffer, sortSize ); + adl::RadixSort32::execute( data->m_sort32, *data->m_sortDataBuffer, sortSize ); + + } else + { + BT_PROFILE("cpu RadixSort"); + adl::HostBuffer sortData(m_internalData->m_deviceHost,nContacts); + data->m_sortDataBuffer->read(sortData.m_ptr,nContacts); + adl::DeviceUtils::waitForCompletion( m_internalData->m_deviceCL); + + adl::RadixSort::Data* sData = adl::RadixSort::allocate( m_internalData->m_deviceHost, nContacts ); + adl::RadixSort::execute( sData, sortData, nContacts ); + adl::RadixSort::deallocate( sData ); + + data->m_sortDataBuffer->write(sortData.m_ptr,nContacts); + adl::DeviceUtils::waitForCompletion( m_internalData->m_deviceCL); + } + + + + bool gpuBoundSearch=true; + if (gpuBoundSearch) + { // 4. find entries + BT_PROFILE("gpuBoundSearch"); + adl::BoundSearch::execute( data->m_search, *data->m_sortDataBuffer, nContacts, *countsNative, + adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT, adl::BoundSearchBase::COUNT ); + + adl::PrefixScan::execute( data->m_scan, *countsNative, *offsetsNative, + adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT ); + } else + { + BT_PROFILE("cpuBoundSearch"); + adl::HostBuffer sortData(m_internalData->m_deviceHost,nContacts); + data->m_sortDataBuffer->read(sortData.m_ptr,nContacts); + adl::DeviceUtils::waitForCompletion( m_internalData->m_deviceCL); + + adl::HostBuffer n0( m_internalData->m_deviceHost, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT ); + adl::HostBuffer offset0( m_internalData->m_deviceHost, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT ); + for(int i=0; i=0); + assert(idxwrite(n0.m_ptr,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT); + offsetsNative->write(offset0.m_ptr,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT); + adl::DeviceUtils::waitForCompletion( data->m_device ); + + } + { // 5. sort constraints by cellIdx + { + BT_PROFILE("gpu m_reorderContactKernel"); + adl::Buffer constBuffer( data->m_device, 1, adl::BufferBase::BUFFER_CONST ); + + int4 cdata; cdata.x = nContacts; + adl::Launcher::BufferInfo bInfo[] = { adl::Launcher::BufferInfo( contactNative ), adl::Launcher::BufferInfo( data->m_contactBuffer ), adl::Launcher::BufferInfo( data->m_sortDataBuffer ) }; + adl::Launcher launcher( data->m_device, data->m_reorderContactKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(adl::Launcher::BufferInfo) ); + launcher.setConst( constBuffer, cdata ); + launcher.launch1D( nContacts, 64 ); + } + } + + } + + adl::BufferUtils::unmap( bodyNative, bodyBuf ); + adl::BufferUtils::unmap( contactNative, contactsIn ); + + } + + adl::DeviceUtils::waitForCompletion( m_internalData->m_deviceCL); + + { + BT_PROFILE("gpu m_copyConstraintKernel"); + adl::Buffer constBuffer( data->m_device, 1, adl::BufferBase::BUFFER_CONST ); + int4 cdata; cdata.x = nContacts; + adl::Launcher::BufferInfo bInfo[] = { adl::Launcher::BufferInfo( data->m_contactBuffer ), adl::Launcher::BufferInfo( contactNative ) }; + adl::Launcher launcher( data->m_device, data->m_copyConstraintKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(adl::Launcher::BufferInfo) ); + launcher.setConst( constBuffer, cdata ); + launcher.launch1D( nContacts, 64 ); + adl::DeviceUtils::waitForCompletion( data->m_device ); + } + + bool compareGPU = false; + if (gpuBatchContacts) + { + BT_PROFILE("gpu batchContacts"); + adl::Solver::batchContacts( data, contactNative, nContacts, nativeSolveData->m_numConstraints, nativeSolveData->m_offsets, csCfg.m_staticIdx ); + } + else + { + BT_PROFILE("cpu batchContacts2"); + cpuSolverData->m_parallelSolveData = 0;// + ParallelSolveData* dataGPU = (ParallelSolveData*)m_internalData->m_solverDataGPU->m_parallelSolveData; + adl::Buffer numConstraints(cpuSolverData->m_device,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT); + adl::Buffer offsets(cpuSolverData->m_device,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT); + + { + BT_PROFILE("gpu->cpu read m_numConstraints"); + dataGPU->m_numConstraints->read(numConstraints.m_ptr,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT); + dataGPU->m_offsets->read(offsets.m_ptr,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT); + adl::DeviceUtils::waitForCompletion( data->m_device ); + } + + adl::Buffer gpunumConstraints(cpuSolverData->m_device,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT); + adl::Buffer gpuoffsets(cpuSolverData->m_device,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT); + + if (compareGPU) + { + adl::Buffer contactNativeCopy (data->m_device,contactNative->getSize()); + contactNativeCopy.write(*contactNative,contactNative->getSize()); + adl::DeviceUtils::waitForCompletion( data->m_device ); + + adl::Buffer tmpNumGPU(data->m_device,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT); + adl::Buffer tmpOffsetGPU(data->m_device,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT); + tmpNumGPU.write(numConstraints.m_ptr,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT); + tmpOffsetGPU.write(offsets.m_ptr,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT); + adl::DeviceUtils::waitForCompletion( data->m_device ); + + BT_PROFILE("gpu batchContacts"); + //adl::Solver::batchContacts( data, contactNative, nContacts, nativeSolveData->m_numConstraints, nativeSolveData->m_offsets, csCfg.m_staticIdx ); + adl::Solver::batchContacts( data, &contactNativeCopy, nContacts, &tmpNumGPU, &tmpOffsetGPU, csCfg.m_staticIdx ); + + + adl::DeviceUtils::waitForCompletion( data->m_device ); + + //compare now + tmpNumGPU.read(gpunumConstraints,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT); + tmpOffsetGPU.read(gpuoffsets,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT); + adl::DeviceUtils::waitForCompletion( data->m_device ); + + } + + CPUSolveData* dataCPU = (CPUSolveData*)cpuSolverData->m_parallelSolveData; + + { + BT_PROFILE("cpu batchContacts2"); + batchContacts2( cpuSolverData, contactNative, nContacts, &numConstraints, &offsets, csCfg.m_staticIdx ); + } + + + if (compareGPU) + { + adl::DeviceUtils::waitForCompletion( data->m_device ); + dataGPU->m_numConstraints->write(numConstraints.m_ptr,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT); + dataGPU->m_offsets->write(offsets.m_ptr,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT); + adl::DeviceUtils::waitForCompletion( data->m_device ); + + + for (int i=0;i::convertToConstraints( data, bodyBuf, shapeBuf, contactNative, contactCOut, additionalData, nContacts, csCfg ); + adl::DeviceUtils::waitForCompletion( data->m_device ); + } + if (compareGPU) + { + adl::Buffer contactNativeCPU(cpuSolverData->m_device,contactNative->getSize()); + contactNative->read(contactNativeCPU,nContacts); + adl::DeviceUtils::waitForCompletion( data->m_device ); + for (int i=0;i::reorderConvertToConstraints( + m_internalData->m_solverDataGPU, + m_internalData->m_bodyBufferGPU, + m_internalData->m_inertiaBufferGPU, + m_internalData->m_pBufContactOutGPU, + m_internalData->m_contactCGPU, + m_internalData->m_frictionCGPU, + nContactOut, + csCfg ); + adl::DeviceUtils::waitForCompletion( m_internalData->m_deviceCL ); + } + + + if (1) + { + BT_PROFILE("GPU solveContactConstraint"); + m_internalData->m_solverDataGPU->m_nIterations = 5; + + adl::Solver::solveContactConstraint( m_internalData->m_solverDataGPU, + m_internalData->m_bodyBufferGPU, + m_internalData->m_inertiaBufferGPU, + m_internalData->m_contactCGPU, + 0, + nContactOut ); + + adl::DeviceUtils::waitForCompletion( m_internalData->m_deviceCL ); + } + + if (cpuSolverData) + adl::Solver::deallocate( cpuSolverData ); + + if (0) + { + BT_PROFILE("read body velocities back to CPU"); + //read body updated linear/angular velocities back to CPU + m_internalData->m_bodyBufferGPU->read( + m_internalData->m_bodyBufferCPU->m_ptr,numOfConvexRBodies); + adl::DeviceUtils::waitForCompletion( m_internalData->m_deviceCL ); + } + } + +} diff --git a/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline/btGpuNarrowPhaseAndSolver.h b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline/btGpuNarrowPhaseAndSolver.h new file mode 100644 index 000000000..9d5941334 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline/btGpuNarrowPhaseAndSolver.h @@ -0,0 +1,72 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Erwin Coumans + +#ifndef GPU_NARROWPHASE_SOLVER_H +#define GPU_NARROWPHASE_SOLVER_H + + + +//#define MAX_CONVEX_BODIES_CL 8*1024 +#define MAX_CONVEX_BODIES_CL 128*1024 +#define MAX_PAIRS_PER_BODY_CL 16 +#define MAX_CONVEX_SHAPES_CL 8192 +#define MAX_BROADPHASE_COLLISION_CL (MAX_CONVEX_BODIES_CL*MAX_PAIRS_PER_BODY_CL) + +/* +#define MAX_CONVEX_BODIES_CL 1024 +#define MAX_PAIRS_PER_BODY_CL 32 +#define MAX_CONVEX_SHAPES_CL 8192 +#define MAX_BROADPHASE_COLLISION_CL (MAX_CONVEX_BODIES_CL*MAX_PAIRS_PER_BODY_CL) +*/ + +namespace adl +{ + struct DeviceCL; +}; + + +struct CustomDispatchData; + +#include "../basic_initialize/btOpenCLInclude.h" + + +class btGpuNarrowphaseAndSolver +{ +protected: + + CustomDispatchData* m_internalData; + int m_acceleratedCompanionShapeIndex; + int m_planeBodyIndex; + +public: + btGpuNarrowphaseAndSolver(adl::DeviceCL* deviceCL); + + virtual ~btGpuNarrowphaseAndSolver(void); + + int registerShape(class ConvexHeightField* convexShape); + int registerRigidBody(int shapeIndex, float mass, const float* position, const float* orientation, bool writeToGpu = true); + void writeAllBodiesToGpu(); + + //btBroadphasePair* GetPair(btBroadphasePairArray& pairArray, int idxBodyA, int idxBodyB); + + virtual void computeContactsAndSolver(cl_mem broadphasePairs, int numBroadphasePairs); + + cl_mem getBodiesGpu(); + + cl_mem getBodyInertiasGpu(); + +}; + +#endif //GPU_NARROWPHASE_SOLVER_H diff --git a/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline/main.cpp b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline/main.cpp new file mode 100644 index 000000000..07dfc82b2 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline/main.cpp @@ -0,0 +1,1775 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Erwin Coumans + + +int NUM_OBJECTS_X = 54; +int NUM_OBJECTS_Y = 35; +int NUM_OBJECTS_Z = 54; + +float X_GAP = 12.f; +float Y_GAP = 2.f; +float Z_GAP = 2.f; + +int preferredGPU = -1; +int preferredPlatform=-1; +int USE_GL_CL_INTEROP=1; +extern int gpuBatchContacts; + + +#include +#include + +#include "btGlutInclude.h" +#include "../opengl_interop/btStopwatch.h" +#include "../../dynamics/basic_demo/ConvexHeightFieldShape.h" +#include "../../dynamics/basic_demo/Stubs/AdlRigidBody.h" + +#include "btGpuNarrowphaseAndSolver.h" + +#include "LinearMath/btQuickprof.h" + +#include "LinearMath/btVector3.h" +#include "LinearMath/btQuaternion.h" +#include "LinearMath/btMatrix3x3.h" +static float sAngle(0); + +#include + +#ifdef _WIN32 +#include +#endif + +#include +#include +#include "../3dGridBroadphase/Shared/btGpu3DGridBroadphase.h" +#include "../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.h" +#include "../broadphase_benchmark/btGridBroadphaseCl.h" +#include "btConvexUtility.h" + +#define USE_NEW +#ifdef USE_NEW +btGridBroadphaseCl* sBroadphase=0; +#else +btGpu3DGridBroadphase* sBroadphase=0; +#endif + +btAlignedObjectArray proxyArray; + +int gShapeIndex=0; + + +#define RS_SCALE (1.0 / (1.0 + RAND_MAX)) + + +int randbiased (double x) { + for (;;) { + double p = rand () * RS_SCALE; + if (p >= x) return 0; + if (p+RS_SCALE <= x) return 1; + /* p < x < p+RS_SCALE */ + x = (x - p) * (1.0 + RAND_MAX); + } +} + +size_t randrange (size_t n) +{ + double xhi; + double resolution = n * RS_SCALE; + double x = resolution * rand (); /* x in [0,n) */ + size_t lo = (size_t) floor (x); + + xhi = x + resolution; + + for (;;) { + lo++; + if (lo >= xhi || randbiased ((lo - x) / (xhi - x))) return lo-1; + x = lo; + } +} + +//OpenCL stuff +#include "../basic_initialize/btOpenCLUtils.h" +#include "../opengl_interop/btOpenCLGLInteropBuffer.h" + +#include "../broadphase_benchmark/findPairsOpenCL.h" + +btFindPairsIO gFpIO; + +cl_context g_cxMainContext; +cl_command_queue g_cqCommandQue; +cl_device_id g_device; + +cl_mem gLinVelMem; +cl_mem gAngVelMem; +cl_mem gBodyTimes; + +btVector3 m_cameraPosition(142,20,142); +btVector3 m_cameraTargetPosition(0,10,0); +btScalar m_cameraDistance = 55; +btVector3 m_cameraUp(0,1,0); +float m_azi=30.f; +float m_ele=5.f; + + + + +btOpenCLGLInteropBuffer* g_interopBuffer = 0; +cl_mem clBuffer=0; +char* hostPtr=0; +cl_bool blocking= CL_TRUE; + + +cl_kernel g_integrateTransformsKernel; + + + +////for Adl +#include + +adl::DeviceCL* g_deviceCL=0; + + + +bool useCPU = false; +bool printStats = true; +bool runOpenCLKernels = true; + +#define MSTRINGIFY(A) #A +static char* interopKernelString = +#include "../broadphase_benchmark/integrateKernel.cl" + +#define INTEROPKERNEL_SRC_PATH "../../opencl/broadphase_benchmark/integrateKernel.cl" + + +ConvexHeightField* s_convexHeightField = 0 ; +btGpuNarrowphaseAndSolver* narrowphaseAndSolver =0; + + +btStopwatch gStopwatch; +int m_glutScreenWidth = 640; +int m_glutScreenHeight= 480; + +bool m_ortho = false; + +static GLuint instancingShader; // The instancing renderer +static GLuint cube_vao; +static GLuint cube_vbo; +static GLuint index_vbo; +static GLuint m_texturehandle; + +static bool done = false; +static GLint angle_loc = 0; +static GLint ModelViewMatrix; +static GLint ProjectionMatrix; + +void writeTransforms(); + +static GLint uniform_texture_diffuse = 0; + +//used for dynamic loading from disk (default switched off) +#define MAX_SHADER_LENGTH 8192 +static GLubyte shaderText[MAX_SHADER_LENGTH]; + +static const char* vertexShader= \ +"#version 330\n" +"precision highp float;\n" +"\n" +"\n" +"\n" +"layout (location = 0) in vec4 position;\n" +"layout (location = 1) in vec4 instance_position;\n" +"layout (location = 2) in vec4 instance_quaternion;\n" +"layout (location = 3) in vec2 uvcoords;\n" +"layout (location = 4) in vec3 vertexnormal;\n" +"layout (location = 5) in vec4 instance_color;\n" +"layout (location = 6) in vec3 instance_scale;\n" +"\n" +"\n" +"uniform float angle = 0.0;\n" +"uniform mat4 ModelViewMatrix;\n" +"uniform mat4 ProjectionMatrix;\n" +"\n" +"out Fragment\n" +"{\n" +" vec4 color;\n" +"} fragment;\n" +"\n" +"out Vert\n" +"{\n" +" vec2 texcoord;\n" +"} vert;\n" +"\n" +"\n" +"vec4 quatMul ( in vec4 q1, in vec4 q2 )\n" +"{\n" +" vec3 im = q1.w * q2.xyz + q1.xyz * q2.w + cross ( q1.xyz, q2.xyz );\n" +" vec4 dt = q1 * q2;\n" +" float re = dot ( dt, vec4 ( -1.0, -1.0, -1.0, 1.0 ) );\n" +" return vec4 ( im, re );\n" +"}\n" +"\n" +"vec4 quatFromAxisAngle(vec4 axis, in float angle)\n" +"{\n" +" float cah = cos(angle*0.5);\n" +" float sah = sin(angle*0.5);\n" +" float d = inversesqrt(dot(axis,axis));\n" +" vec4 q = vec4(axis.x*sah*d,axis.y*sah*d,axis.z*sah*d,cah);\n" +" return q;\n" +"}\n" +"//\n" +"// vector rotation via quaternion\n" +"//\n" +"vec4 quatRotate3 ( in vec3 p, in vec4 q )\n" +"{\n" +" vec4 temp = quatMul ( q, vec4 ( p, 0.0 ) );\n" +" return quatMul ( temp, vec4 ( -q.x, -q.y, -q.z, q.w ) );\n" +"}\n" +"vec4 quatRotate ( in vec4 p, in vec4 q )\n" +"{\n" +" vec4 temp = quatMul ( q, p );\n" +" return quatMul ( temp, vec4 ( -q.x, -q.y, -q.z, q.w ) );\n" +"}\n" +"\n" +"out vec3 lightDir,normal,ambient;\n" +"\n" +"void main(void)\n" +"{\n" +" vec4 q = instance_quaternion;\n" +" ambient = vec3(0.3,.3,0.3);\n" +" \n" +" \n" +" vec4 local_normal = (quatRotate3( vertexnormal,q));\n" +" vec3 light_pos = vec3(-0.8,1,-0.6);\n" +" normal = local_normal.xyz;\n"//normalize(ModelViewMatrix * local_normal).xyz;\n" +"\n" +" lightDir = normalize(light_pos);//gl_LightSource[0].position.xyz));\n" +"// lightDir = normalize(vec3(gl_LightSource[0].position));\n" +" \n" +" vec4 axis = vec4(1,1,1,0);\n" +" vec4 localcoord = quatRotate3( position.xyz*instance_scale,q);\n" +" vec4 vertexPos = ProjectionMatrix * ModelViewMatrix *(instance_position+localcoord);\n" +"\n" +" gl_Position = vertexPos;\n" +" \n" +" fragment.color = instance_color;\n" +" vert.texcoord = uvcoords;\n" +"}\n" +; + + +static const char* fragmentShader= \ +"#version 330\n" +"precision highp float;\n" +"\n" +"in Fragment\n" +"{\n" +" vec4 color;\n" +"} fragment;\n" +"\n" +"in Vert\n" +"{\n" +" vec2 texcoord;\n" +"} vert;\n" +"\n" +"uniform sampler2D Diffuse;\n" +"\n" +"in vec3 lightDir,normal,ambient;\n" +"\n" +"out vec4 color;\n" +"\n" +"void main_textured(void)\n" +"{\n" +" color = texture2D(Diffuse,vert.texcoord);//fragment.color;\n" +"}\n" +"\n" +"void main(void)\n" +"{\n" +" vec4 texel = fragment.color*texture2D(Diffuse,vert.texcoord);//fragment.color;\n" +" vec3 ct,cf;\n" +" float intensity,at,af;\n" +" intensity = max(dot(lightDir,normalize(normal)),.2);\n" +" cf = intensity*vec3(1.0,1.0,1.0)+ambient;" +" af = 1.0;\n" +" \n" +" ct = texel.rgb;\n" +" at = texel.a;\n" +" \n" +" color = vec4(ct * cf, at * af); \n" +"}\n" +; + + +// Load the shader from the source text +void gltLoadShaderSrc(const char *szShaderSrc, GLuint shader) +{ + GLchar *fsStringPtr[1]; + + fsStringPtr[0] = (GLchar *)szShaderSrc; + glShaderSource(shader, 1, (const GLchar **)fsStringPtr, NULL); +} + + +//////////////////////////////////////////////////////////////// +// Load the shader from the specified file. Returns false if the +// shader could not be loaded +bool gltLoadShaderFile(const char *szFile, GLuint shader) +{ + GLint shaderLength = 0; + FILE *fp; + + // Open the shader file + fp = fopen(szFile, "r"); + if(fp != NULL) + { + // See how long the file is + while (fgetc(fp) != EOF) + shaderLength++; + + // Allocate a block of memory to send in the shader + assert(shaderLength < MAX_SHADER_LENGTH); // make me bigger! + if(shaderLength > MAX_SHADER_LENGTH) + { + fclose(fp); + return false; + } + + // Go back to beginning of file + rewind(fp); + + // Read the whole file in + if (shaderText != NULL) + fread(shaderText, 1, shaderLength, fp); + + // Make sure it is null terminated and close the file + shaderText[shaderLength] = '\0'; + fclose(fp); + } + else + return false; + + // printf(shaderText); + // Load the string + gltLoadShaderSrc((const char *)shaderText, shader); + + return true; +} + + +///////////////////////////////////////////////////////////////// +// Load a pair of shaders, compile, and link together. Specify the complete +// file path for each shader. Note, there is no support for +// just loading say a vertex program... you have to do both. +GLuint gltLoadShaderPair(const char *szVertexProg, const char *szFragmentProg, bool loadFromFile) +{ + // Temporary Shader objects + GLuint hVertexShader; + GLuint hFragmentShader; + GLuint hReturn = 0; + GLint testVal; + + // Create shader objects + hVertexShader = glCreateShader(GL_VERTEX_SHADER); + hFragmentShader = glCreateShader(GL_FRAGMENT_SHADER); + + if (loadFromFile) + { + + if(gltLoadShaderFile(szVertexProg, hVertexShader) == false) + { + glDeleteShader(hVertexShader); + glDeleteShader(hFragmentShader); + return (GLuint)NULL; + } + + if(gltLoadShaderFile(szFragmentProg, hFragmentShader) == false) + { + glDeleteShader(hVertexShader); + glDeleteShader(hFragmentShader); + return (GLuint)NULL; + } + } else + { + gltLoadShaderSrc(vertexShader, hVertexShader); + gltLoadShaderSrc(fragmentShader, hFragmentShader); + } + // Compile them + glCompileShader(hVertexShader); + glCompileShader(hFragmentShader); + + // Check for errors + glGetShaderiv(hVertexShader, GL_COMPILE_STATUS, &testVal); + if(testVal == GL_FALSE) + { + char temp[256] = ""; + glGetShaderInfoLog( hVertexShader, 256, NULL, temp); + fprintf( stderr, "Compile failed:\n%s\n", temp); + assert(0); + exit(0); + glDeleteShader(hVertexShader); + glDeleteShader(hFragmentShader); + return (GLuint)NULL; + } + + glGetShaderiv(hFragmentShader, GL_COMPILE_STATUS, &testVal); + if(testVal == GL_FALSE) + { + char temp[256] = ""; + glGetShaderInfoLog( hFragmentShader, 256, NULL, temp); + fprintf( stderr, "Compile failed:\n%s\n", temp); + assert(0); + exit(0); + glDeleteShader(hVertexShader); + glDeleteShader(hFragmentShader); + return (GLuint)NULL; + } + + // Link them - assuming it works... + hReturn = glCreateProgram(); + glAttachShader(hReturn, hVertexShader); + glAttachShader(hReturn, hFragmentShader); + + glLinkProgram(hReturn); + + // These are no longer needed + glDeleteShader(hVertexShader); + glDeleteShader(hFragmentShader); + + // Make sure link worked too + glGetProgramiv(hReturn, GL_LINK_STATUS, &testVal); + if(testVal == GL_FALSE) + { + glDeleteProgram(hReturn); + return (GLuint)NULL; + } + + return hReturn; +} + +///position xyz, unused w, normal, uv +static const GLfloat cube_vertices[] = +{ + -1.0f, -1.0f, 1.0f, 0.0f, 0,0,1, 0,0,//0 + 1.0f, -1.0f, 1.0f, 0.0f, 0,0,1, 1,0,//1 + 1.0f, 1.0f, 1.0f, 0.0f, 0,0,1, 1,1,//2 + -1.0f, 1.0f, 1.0f, 0.0f, 0,0,1, 0,1 ,//3 + + -1.0f, -1.0f, -1.0f, 1.0f, 0,0,-1, 0,0,//4 + 1.0f, -1.0f, -1.0f, 1.0f, 0,0,-1, 1,0,//5 + 1.0f, 1.0f, -1.0f, 1.0f, 0,0,-1, 1,1,//6 + -1.0f, 1.0f, -1.0f, 1.0f, 0,0,-1, 0,1,//7 + + -1.0f, -1.0f, -1.0f, 1.0f, -1,0,0, 0,0, + -1.0f, 1.0f, -1.0f, 1.0f, -1,0,0, 1,0, + -1.0f, 1.0f, 1.0f, 1.0f, -1,0,0, 1,1, + -1.0f, -1.0f, 1.0f, 1.0f, -1,0,0, 0,1, + + 1.0f, -1.0f, -1.0f, 1.0f, 1,0,0, 0,0, + 1.0f, 1.0f, -1.0f, 1.0f, 1,0,0, 1,0, + 1.0f, 1.0f, 1.0f, 1.0f, 1,0,0, 1,1, + 1.0f, -1.0f, 1.0f, 1.0f, 1,0,0, 0,1, + + -1.0f, -1.0f, -1.0f, 1.0f, 0,-1,0, 0,0, + -1.0f, -1.0f, 1.0f, 1.0f, 0,-1,0, 1,0, + 1.0f, -1.0f, 1.0f, 1.0f, 0,-1,0, 1,1, + 1.0f,-1.0f, -1.0f, 1.0f, 0,-1,0, 0,1, + + -1.0f, 1.0f, -1.0f, 1.0f, 0,1,0, 0,0, + -1.0f, 1.0f, 1.0f, 1.0f, 0,1,0, 1,0, + 1.0f, 1.0f, 1.0f, 1.0f, 0,1,0, 1,1, + 1.0f,1.0f, -1.0f, 1.0f, 0,1,0, 0,1, +}; + +static const int cube_indices[]= +{ + 0,1,2,0,2,3,//ground face + 4,5,6,4,6,7,//top face + 8,9,10,8,10,11, + 12,13,14,12,14,15, + 16,17,18,16,18,19, + 20,21,22,20,22,23 +}; + +int m_mouseOldX = -1; +int m_mouseOldY = -1; +int m_mouseButtons = 0; + + +void mouseFunc(int button, int state, int x, int y) +{ + if (state == 0) + { + m_mouseButtons |= 1<0) + { + g_device= btOpenCLUtils::getDevice(g_cxMainContext,0); + btOpenCLDeviceInfo clInfo; + btOpenCLUtils::getDeviceInfo(g_device,clInfo); + btOpenCLUtils::printDeviceInfo(g_device); + // create a command-queue + g_cqCommandQue = clCreateCommandQueue(g_cxMainContext, g_device, 0, &ciErrNum); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + //normally you would create and execute kernels using this command queue + + } + + +} + +int NUM_OBJECTS = NUM_OBJECTS_X*NUM_OBJECTS_Y*NUM_OBJECTS_Z; +int POSITION_BUFFER_SIZE = (NUM_OBJECTS*sizeof(float)*4); +int ORIENTATION_BUFFER_SIZE = (NUM_OBJECTS*sizeof(float)*4); +int COLOR_BUFFER_SIZE = (NUM_OBJECTS*sizeof(float)*4); +int SCALE_BUFFER_SIZE = (NUM_OBJECTS*sizeof(float)*3); + +int VBOsize =0; + + +GLfloat* instance_positions_ptr = 0; +GLfloat* instance_quaternion_ptr = 0; +GLfloat* instance_colors_ptr = 0; +GLfloat* instance_scale_ptr= 0; + + +void DeleteShaders() +{ + glDeleteVertexArrays(1, &cube_vao); + glDeleteBuffers(1,&index_vbo); + glDeleteBuffers(1,&cube_vbo); + glDeleteProgram(instancingShader); +} + + +void InitShaders() +{ + + btOverlappingPairCache* overlappingPairCache=0; + int maxObjects = btMax(256,NUM_OBJECTS); +#ifdef USE_NEW + int maxPairsSmallProxy = 32; + + sBroadphase = new btGridBroadphaseCl(overlappingPairCache,btVector3(4.f, 4.f, 4.f), 128, 128, 128,maxObjects, maxObjects, maxPairsSmallProxy, 100.f, 128, + g_cxMainContext ,g_device,g_cqCommandQue, g_deviceCL); +#else + sBroadphase = new btGpu3DGridBroadphase(btVector3(2.f, 2.f, 2.f), 32, 32, 32,maxObjects, maxObjects, 64, 100.f, 64); +#endif + + + +// sBroadphase = new bt3dGridBroadphaseOCL(overlappingPairCache,btVector3(10.f, 10.f, 10.f), 32, 32, 32,NUM_OBJECTS, NUM_OBJECTS, 64, 100.f, 16, +// g_cxMainContext ,g_device,g_cqCommandQue); + + + + bool loadFromFile = false; + instancingShader = gltLoadShaderPair("instancing.vs","instancing.fs", loadFromFile); + + glLinkProgram(instancingShader); + glUseProgram(instancingShader); + angle_loc = glGetUniformLocation(instancingShader, "angle"); + ModelViewMatrix = glGetUniformLocation(instancingShader, "ModelViewMatrix"); + ProjectionMatrix = glGetUniformLocation(instancingShader, "ProjectionMatrix"); + uniform_texture_diffuse = glGetUniformLocation(instancingShader, "Diffuse"); + + GLuint offset = 0; + + + glGenBuffers(1, &cube_vbo); + glBindBuffer(GL_ARRAY_BUFFER, cube_vbo); + + instance_positions_ptr = (GLfloat*)new float[NUM_OBJECTS*4]; + instance_quaternion_ptr = (GLfloat*)new float[NUM_OBJECTS*4]; + instance_colors_ptr = (GLfloat*)new float[NUM_OBJECTS*4]; + instance_scale_ptr = (GLfloat*)new float[NUM_OBJECTS*3]; + + + + int index=0; + for (int i=0;icreateProxy(aabbMin,aabbMax,shapeType,myptr,1,1,0,0);//m_dispatcher); + proxyArray.push_back(proxy); + + instance_quaternion_ptr[index*4]=0; + instance_quaternion_ptr[index*4+1]=0; + instance_quaternion_ptr[index*4+2]=0; + instance_quaternion_ptr[index*4+3]=1; + + instance_colors_ptr[index*4]=jregisterRigidBody(gShapeIndex,mass,&instance_positions_ptr[index*4],&instance_quaternion_ptr[index*4],writeToGpu); + + index++; + } + } + } + + float posZero[4] = {0,-NUM_OBJECTS_Y/2-1,0,0}; + float ornZero[4] = {0,0,0,1}; + + //register a 'plane' + if (narrowphaseAndSolver) + narrowphaseAndSolver->registerRigidBody(-1, 0.f, posZero,ornZero,false); + + + + if (narrowphaseAndSolver) + narrowphaseAndSolver->writeAllBodiesToGpu(); + + + int size = sizeof(cube_vertices) + POSITION_BUFFER_SIZE+ORIENTATION_BUFFER_SIZE+COLOR_BUFFER_SIZE+SCALE_BUFFER_SIZE; + VBOsize = size; + + char* bla = (char*)malloc(size); + int szc = sizeof(cube_vertices); + memcpy(bla,&cube_vertices[0],szc); + memcpy(bla+sizeof(cube_vertices),instance_positions_ptr,POSITION_BUFFER_SIZE); + memcpy(bla+sizeof(cube_vertices)+POSITION_BUFFER_SIZE,instance_quaternion_ptr,ORIENTATION_BUFFER_SIZE); + memcpy(bla+sizeof(cube_vertices)+POSITION_BUFFER_SIZE+ORIENTATION_BUFFER_SIZE,instance_colors_ptr, COLOR_BUFFER_SIZE); + memcpy(bla+sizeof(cube_vertices)+POSITION_BUFFER_SIZE+ORIENTATION_BUFFER_SIZE+COLOR_BUFFER_SIZE,instance_scale_ptr, SCALE_BUFFER_SIZE); + + glBufferData(GL_ARRAY_BUFFER, size, bla, GL_DYNAMIC_DRAW);//GL_STATIC_DRAW); + + ///initialize parts of the buffer +#ifdef _USE_SUB_DATA + glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(cube_vertices)+ 16384, bla);//cube_vertices); +#endif + + char* dest= (char*)glMapBuffer( GL_ARRAY_BUFFER,GL_WRITE_ONLY);//GL_WRITE_ONLY + memcpy(dest,cube_vertices,sizeof(cube_vertices)); + //memcpy(dest+sizeof(cube_vertices),instance_colors,sizeof(instance_colors)); + glUnmapBuffer( GL_ARRAY_BUFFER); + + + + writeTransforms(); + + /* + glBufferSubData(GL_ARRAY_BUFFER, sizeof(cube_vertices) + sizeof(instance_colors), POSITION_BUFFER_SIZE, instance_positions_ptr); + glBufferSubData(GL_ARRAY_BUFFER, sizeof(cube_vertices) + sizeof(instance_colors)+POSITION_BUFFER_SIZE,ORIENTATION_BUFFER_SIZE , instance_quaternion_ptr); + */ + + glGenVertexArrays(1, &cube_vao); + glBindVertexArray(cube_vao); + glBindBuffer(GL_ARRAY_BUFFER, cube_vbo); + glBindVertexArray(0); + + glGenBuffers(1, &index_vbo); + int indexBufferSize = sizeof(cube_indices); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_vbo); + + glBufferData(GL_ELEMENT_ARRAY_BUFFER, indexBufferSize, NULL, GL_STATIC_DRAW); + glBufferSubData(GL_ELEMENT_ARRAY_BUFFER,0,indexBufferSize,cube_indices); + + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); + glBindBuffer(GL_ARRAY_BUFFER,0); + glBindVertexArray(0); + +} + + + +void updateCamera() +{ + + + + btVector3 m_cameraUp(0,1,0); + int m_forwardAxis=2; + + + glMatrixMode(GL_PROJECTION); + glLoadIdentity(); + + btScalar rele = m_ele * btScalar(0.01745329251994329547);// rads per deg + btScalar razi = m_azi * btScalar(0.01745329251994329547);// rads per deg + + + btQuaternion rot(m_cameraUp,razi); + + + btVector3 eyePos(0,0,0); + eyePos[m_forwardAxis] = -m_cameraDistance; + + btVector3 forward(eyePos[0],eyePos[1],eyePos[2]); + if (forward.length2() < SIMD_EPSILON) + { + forward.setValue(1.f,0.f,0.f); + } + btVector3 right = m_cameraUp.cross(forward); + btQuaternion roll(right,-rele); + + eyePos = btMatrix3x3(rot) * btMatrix3x3(roll) * eyePos; + + m_cameraPosition[0] = eyePos.getX(); + m_cameraPosition[1] = eyePos.getY(); + m_cameraPosition[2] = eyePos.getZ(); + m_cameraPosition += m_cameraTargetPosition; + + + float m_frustumZNear=1; + float m_frustumZFar=1000; + + if (m_glutScreenWidth == 0 && m_glutScreenHeight == 0) + return; + + float aspect; + btVector3 extents; + + if (m_glutScreenWidth > m_glutScreenHeight) + { + aspect = m_glutScreenWidth / (float)m_glutScreenHeight; + extents.setValue(aspect * 1.0f, 1.0f,0); + } else + { + aspect = m_glutScreenHeight / (float)m_glutScreenWidth; + extents.setValue(1.0f, aspect*1.f,0); + } + + + if (m_ortho) + { + // reset matrix + glLoadIdentity(); + extents *= m_cameraDistance; + btVector3 lower = m_cameraTargetPosition - extents; + btVector3 upper = m_cameraTargetPosition + extents; + glOrtho(lower.getX(), upper.getX(), lower.getY(), upper.getY(),-1000,1000); + + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); + } else + { + if (m_glutScreenWidth > m_glutScreenHeight) + { + glFrustum (-aspect * m_frustumZNear, aspect * m_frustumZNear, -m_frustumZNear, m_frustumZNear, m_frustumZNear, m_frustumZFar); + } else + { + glFrustum (-aspect * m_frustumZNear, aspect * m_frustumZNear, -m_frustumZNear, m_frustumZNear, m_frustumZNear, m_frustumZFar); + } + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); + gluLookAt(m_cameraPosition[0], m_cameraPosition[1], m_cameraPosition[2], + m_cameraTargetPosition[0], m_cameraTargetPosition[1], m_cameraTargetPosition[2], + m_cameraUp.getX(),m_cameraUp.getY(),m_cameraUp.getZ()); + } + +} + + + +void myinit() +{ + GLint err = glGetError(); + + // GLfloat light_ambient[] = { btScalar(0.2), btScalar(0.2), btScalar(0.2), btScalar(1.0) }; + GLfloat light_ambient[] = { btScalar(1.0), btScalar(1.2), btScalar(0.2), btScalar(1.0) }; + + GLfloat light_diffuse[] = { btScalar(1.0), btScalar(1.0), btScalar(1.0), btScalar(1.0) }; + GLfloat light_specular[] = { btScalar(1.0), btScalar(1.0), btScalar(1.0), btScalar(1.0 )}; + /* light_position is NOT default value */ + GLfloat light_position0[] = { btScalar(10000.0), btScalar(10000.0), btScalar(10000.0), btScalar(0.0 )}; + GLfloat light_position1[] = { btScalar(-1.0), btScalar(-10.0), btScalar(-1.0), btScalar(0.0) }; + + glLightfv(GL_LIGHT0, GL_AMBIENT, light_ambient); + glLightfv(GL_LIGHT0, GL_DIFFUSE, light_diffuse); + glLightfv(GL_LIGHT0, GL_SPECULAR, light_specular); + glLightfv(GL_LIGHT0, GL_POSITION, light_position0); + + glLightfv(GL_LIGHT1, GL_AMBIENT, light_ambient); + glLightfv(GL_LIGHT1, GL_DIFFUSE, light_diffuse); + glLightfv(GL_LIGHT1, GL_SPECULAR, light_specular); + glLightfv(GL_LIGHT1, GL_POSITION, light_position1); + + glEnable(GL_LIGHTING); + glEnable(GL_LIGHT0); + glEnable(GL_LIGHT1); + + + // glShadeModel(GL_FLAT);//GL_SMOOTH); + glShadeModel(GL_SMOOTH); + + glEnable(GL_DEPTH_TEST); + glDepthFunc(GL_LESS); + + glClearColor(float(0.7),float(0.7),float(0.7),float(0)); + glEnable(GL_LIGHTING); + glEnable(GL_LIGHT0); + + + static bool m_textureenabled = true; + static bool m_textureinitialized = false; + + + if(m_textureenabled) + { + if(!m_textureinitialized) + { + glActiveTexture(GL_TEXTURE0); + + GLubyte* image=new GLubyte[256*256*3]; + for(int y=0;y<256;++y) + { + const int t=y>>5; + GLubyte* pi=image+y*256*3; + for(int x=0;x<256;++x) + { + if (x<2||y<2||x>253||y>253) + { + pi[0]=0; + pi[1]=0; + pi[2]=0; + } else + { + pi[0]=255; + pi[1]=255; + pi[2]=255; + } + + /* + const int s=x>>5; + const GLubyte b=180; + GLubyte c=b+((s+t&1)&1)*(255-b); + pi[0]=c; + pi[1]=c; + pi[2]=c; + */ + + pi+=3; + } + } + + glGenTextures(1,(GLuint*)&m_texturehandle); + glBindTexture(GL_TEXTURE_2D,m_texturehandle); + glTexEnvf(GL_TEXTURE_ENV,GL_TEXTURE_ENV_MODE,GL_MODULATE); + glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_MIN_FILTER,GL_LINEAR_MIPMAP_LINEAR); + glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_MAG_FILTER,GL_LINEAR); + glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_WRAP_S,GL_REPEAT); + glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_WRAP_T,GL_REPEAT); + gluBuild2DMipmaps(GL_TEXTURE_2D,3,256,256,GL_RGB,GL_UNSIGNED_BYTE,image); + delete[] image; + m_textureinitialized=true; + } + // glMatrixMode(GL_TEXTURE); + // glLoadIdentity(); + // glMatrixMode(GL_MODELVIEW); + + glEnable(GL_TEXTURE_2D); + glBindTexture(GL_TEXTURE_2D,m_texturehandle); + + } else + { + glDisable(GL_TEXTURE_2D); + } + + glEnable(GL_COLOR_MATERIAL); + + err = glGetError(); + assert(err==GL_NO_ERROR); + + // glEnable(GL_CULL_FACE); + // glCullFace(GL_BACK); +} + +//#pragma optimize( "g", off ) + + + +void writeTransforms() +{ + + + glFlush(); + char* bla = (char*)glMapBuffer( GL_ARRAY_BUFFER,GL_READ_WRITE);//GL_WRITE_ONLY + + float* positions = (float*)(bla+sizeof(cube_vertices)); + float* orientations = (float*)(bla+sizeof(cube_vertices) + POSITION_BUFFER_SIZE); + float* colors= (float*)(bla+sizeof(cube_vertices) + POSITION_BUFFER_SIZE+ORIENTATION_BUFFER_SIZE); + float* scaling= (float*)(bla+sizeof(cube_vertices) + POSITION_BUFFER_SIZE+ORIENTATION_BUFFER_SIZE+COLOR_BUFFER_SIZE); + + // positions[0]+=0.001f; + + static int offset=0; + //offset++; + + static btVector3 axis(1,0,0); + sAngle += 0.01f; + int index=0; + btQuaternion orn(axis,sAngle); + for (int i=0;igetCLBUffer(); + BT_PROFILE("clEnqueueAcquireGLObjects"); + ciErrNum = clEnqueueAcquireGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, NULL); + adl::DeviceUtils::waitForCompletion( g_deviceCL ); + } else + { + + BT_PROFILE("glMapBuffer and clEnqueueWriteBuffer"); + + blocking= CL_TRUE; + hostPtr= (char*)glMapBuffer( GL_ARRAY_BUFFER,GL_READ_WRITE);//GL_WRITE_ONLY + if (!clBuffer) + { + clBuffer = clCreateBuffer(g_cxMainContext, CL_MEM_READ_WRITE, VBOsize, 0, &ciErrNum); + } + adl::DeviceUtils::waitForCompletion( g_deviceCL ); + ciErrNum = clEnqueueWriteBuffer ( g_cqCommandQue, + clBuffer, + blocking, + 0, + VBOsize, + hostPtr,0,0,0 + ); + adl::DeviceUtils::waitForCompletion( g_deviceCL ); + } + + + + oclCHECKERROR(ciErrNum, CL_SUCCESS); + if (runOpenCLKernels) + { + +#ifdef USE_NEW + gFpIO.m_numObjects = NUM_OBJECTS; + gFpIO.m_positionOffset = (sizeof(cube_vertices) )/4; + gFpIO.m_clObjectsBuffer = clBuffer; + gFpIO.m_dAABB = sBroadphase->m_dAABB; + + + { + BT_PROFILE("setupGpuAabbs"); + setupGpuAabbsSimple(gFpIO); + } + { + BT_PROFILE("calculateOverlappingPairs"); + sBroadphase->calculateOverlappingPairs(0, NUM_OBJECTS); + } + gFpIO.m_dAllOverlappingPairs = sBroadphase->m_dAllOverlappingPairs; + gFpIO.m_numOverlap = sBroadphase->m_numPrefixSum; + //printf("gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap ); + if (gFpIO.m_numOverlap>=0 && gFpIO.m_numOverlapgetBodiesGpu(), narrowphaseAndSolver->getBodyInertiasGpu()); + } + if (gFpIO.m_numOverlap) + { + BT_PROFILE("computeContactsAndSolver"); + if (narrowphaseAndSolver) + narrowphaseAndSolver->computeContactsAndSolver(gFpIO.m_dAllOverlappingPairs,gFpIO.m_numOverlap); + } + + { + BT_PROFILE("copyBodyVelocities"); + if (narrowphaseAndSolver) + copyBodyVelocities(gFpIO, gLinVelMem, gAngVelMem, narrowphaseAndSolver->getBodiesGpu(), narrowphaseAndSolver->getBodyInertiasGpu()); + } + } else + { + printf("error, gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap); + btAssert(0); + } + +#else + +#endif + { + BT_PROFILE("integrateTransforms"); + + if (runOpenCLKernels) + { + int numObjects = NUM_OBJECTS; + int offset = (sizeof(cube_vertices) )/4; + + ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 0, sizeof(int), &offset); + ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 1, sizeof(int), &numObjects); + ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 2, sizeof(cl_mem), (void*)&clBuffer ); + + ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 3, sizeof(cl_mem), (void*)&gLinVelMem); + ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 4, sizeof(cl_mem), (void*)&gAngVelMem); + ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 5, sizeof(cl_mem), (void*)&gBodyTimes); + + + + + size_t workGroupSize = 64; + size_t numWorkItems = workGroupSize*((NUM_OBJECTS + (workGroupSize)) / workGroupSize); + + if (workGroupSize>numWorkItems) + workGroupSize=numWorkItems; + + ciErrNum = clEnqueueNDRangeKernel(g_cqCommandQue, g_integrateTransformsKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + } + } + + + } + + if (USE_GL_CL_INTEROP) + { + BT_PROFILE("clEnqueueReleaseGLObjects"); + ciErrNum = clEnqueueReleaseGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, 0); + adl::DeviceUtils::waitForCompletion( g_deviceCL ); + } + else + { + BT_PROFILE("clEnqueueReadBuffer clReleaseMemObject and glUnmapBuffer"); + ciErrNum = clEnqueueReadBuffer ( g_cqCommandQue, + clBuffer, + blocking, + 0, + VBOsize, + hostPtr,0,0,0); + + //clReleaseMemObject(clBuffer); + adl::DeviceUtils::waitForCompletion( g_deviceCL ); + glUnmapBuffer( GL_ARRAY_BUFFER); + glFlush(); + } + + oclCHECKERROR(ciErrNum, CL_SUCCESS); + + + if (runOpenCLKernels) + { + BT_PROFILE("clFinish"); + clFinish(g_cqCommandQue); + } + + + + + } +} + + +//#pragma optimize( "g", on ) + + +void RenderScene(void) +{ + BT_PROFILE("GlutDisplayFunc"); + + +#if 0 + float modelview[20]={0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9}; + // get the current modelview matrix + glGetFloatv(GL_MODELVIEW_MATRIX , modelview); + float projection[20]={0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9}; + glGetFloatv(GL_PROJECTION_MATRIX, projection); +#endif + + myinit(); + + updateCamera(); + + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + + //render coordinate system + glBegin(GL_LINES); + glColor3f(1,0,0); + glVertex3f(0,0,0); + glVertex3f(1,0,0); + glColor3f(0,1,0); + glVertex3f(0,0,0); + glVertex3f(0,1,0); + glColor3f(0,0,1); + glVertex3f(0,0,0); + glVertex3f(0,0,1); + glEnd(); + + //do a finish, to make sure timings are clean + // glFinish(); + + float start = gStopwatch.getTimeMilliseconds(); + + // glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindBuffer(GL_ARRAY_BUFFER, cube_vbo); + glFlush(); + + //updatePos(); + + simulationLoop(); + + //useCPU = true; + + float stop = gStopwatch.getTimeMilliseconds(); + gStopwatch.reset(); + + if (0)//printStats) + { + printf("updatePos=%f ms on ",stop-start); + + if (useCPU) + { + printf("CPU \n"); + } else + { + printf("OpenCL "); + if (runOpenCLKernels) + printf("running the kernels"); + else + printf("without running the kernels"); + printf("\n"); + } + } + + glBindVertexArray(cube_vao); + + glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, 9*sizeof(float), 0); + glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, 0, (GLvoid *)(sizeof(cube_vertices))); + glVertexAttribPointer(2, 4, GL_FLOAT, GL_FALSE, 0, (GLvoid *)(sizeof(cube_vertices)+POSITION_BUFFER_SIZE)); + int uvoffset = 7*sizeof(float); + int normaloffset = 4*sizeof(float); + + glVertexAttribPointer(3, 2, GL_FLOAT, GL_FALSE, 9*sizeof(float), (GLvoid *)uvoffset); + glVertexAttribPointer(4, 3, GL_FLOAT, GL_FALSE, 9*sizeof(float), (GLvoid *)normaloffset); + glVertexAttribPointer(5, 4, GL_FLOAT, GL_FALSE, 0, (GLvoid *)(sizeof(cube_vertices)+POSITION_BUFFER_SIZE+ORIENTATION_BUFFER_SIZE)); + glVertexAttribPointer(6, 3, GL_FLOAT, GL_FALSE, 0, (GLvoid *)(sizeof(cube_vertices)+POSITION_BUFFER_SIZE+ORIENTATION_BUFFER_SIZE+COLOR_BUFFER_SIZE)); + + glEnableVertexAttribArray(0); + glEnableVertexAttribArray(1); + glEnableVertexAttribArray(2); + glEnableVertexAttribArray(3); + glEnableVertexAttribArray(4); + glEnableVertexAttribArray(5); + glEnableVertexAttribArray(6); + + glVertexAttribDivisor(0, 0); + glVertexAttribDivisor(1, 1); + glVertexAttribDivisor(2, 1); + glVertexAttribDivisor(3, 0); + glVertexAttribDivisor(4, 0); + glVertexAttribDivisor(5, 1); + glVertexAttribDivisor(6, 1); + + glUseProgram(instancingShader); + glUniform1f(angle_loc, 0); + GLfloat pm[16]; + glGetFloatv(GL_PROJECTION_MATRIX, pm); + glUniformMatrix4fv(ProjectionMatrix, 1, false, &pm[0]); + + GLfloat mvm[16]; + glGetFloatv(GL_MODELVIEW_MATRIX, mvm); + glUniformMatrix4fv(ModelViewMatrix, 1, false, &mvm[0]); + + glUniform1i(uniform_texture_diffuse, 0); + + glFlush(); + int numInstances = NUM_OBJECTS; + int indexCount = sizeof(cube_indices)/sizeof(int); + int indexOffset = 0; + + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_vbo); + { + BT_PROFILE("glDrawElementsInstanced"); + glDrawElementsInstanced(GL_TRIANGLES, indexCount, GL_UNSIGNED_INT, (void*)indexOffset, numInstances); + } + glUseProgram(0); + glBindBuffer(GL_ARRAY_BUFFER,0); + glBindVertexArray(0); + + glutSwapBuffers(); + glutPostRedisplay(); + + GLint err = glGetError(); + assert(err==GL_NO_ERROR); +} + +extern int numPairsOut; +void mainloop(void) +{ + CProfileManager::Reset(); + RenderScene(); + CProfileManager::Increment_Frame_Counter(); + + if (printStats && runOpenCLKernels) + { + static int count = 10; + count--; + if (count<0) + { + CProfileManager::dumpAll(); + printf("total broadphase pairs= %d\n", gFpIO.m_numOverlap); + printf("numPairsOut (culled) = %d\n", numPairsOut); + + printStats = false; + } + } +} + + +void ChangeSize(int w, int h) +{ + m_glutScreenWidth = w; + m_glutScreenHeight = h; + +#ifdef RECREATE_CL_AND_SHADERS_ON_RESIZE + delete g_interopBuffer; + clReleaseKernel(g_integrateTransformsKernel); + releaseFindPairs(fpio); + DeleteCL(); + DeleteShaders(); +#endif //RECREATE_CL_AND_SHADERS_ON_RESIZE + + // Set Viewport to window dimensions + glViewport(0, 0, w, h); + +#ifdef RECREATE_CL_AND_SHADERS_ON_RESIZE + InitCL(); + InitShaders(); + + g_interopBuffer = new btOpenCLGLInteropBuffer(g_cxMainContext,g_cqCommandQue,cube_vbo); + clFinish(g_cqCommandQue); + g_integrateTransformsKernel = btOpenCLUtils::compileCLKernelFromString(g_cxMainContext, interopKernelString, "interopKernel" ); + initFindPairs(...); +#endif //RECREATE_CL_AND_SHADERS_ON_RESIZE + +} + +void Keyboard(unsigned char key, int x, int y) +{ + switch (key) + { + case 27: + done = true; + break; + case 'O': + case 'o': + { + m_ortho = !m_ortho; + break; + } + case 'c': + case 'C': + { + useCPU = !useCPU; + if (useCPU) + printf("using CPU\n"); + else + printf("using OpenCL\n"); + break; + } + case 's': + case 'S': + { + printStats = !printStats; + break; + } + case 'k': + case 'K': + { + runOpenCLKernels=!runOpenCLKernels; + break; + } + case 'q': + case 'Q': + exit(0); + default: + break; + } +} + +// Cleanup +void ShutdownRC(void) +{ + glDeleteBuffers(1, &cube_vbo); + glDeleteVertexArrays(1, &cube_vao); +} + +#include "CommandlineArgs.h" + +void Usage() +{ + printf("\nprogram.exe [--preferred_gpu=] [--batch_gpu=<0,1>] [--preferred_platform=] [--enable_interop=<0 or 1>] [--x_dim=] [--y_dim=] [--z_dim=] [--x_gap=] [--y_gap=] [--z_gap=]\n"); + printf("\n"); + printf("preferred_gpu : the index used for OpenCL, in case multiple OpenCL-capable GPU are available. This is ignored if interop is enabled"); + printf("preferred_platform : the platform index used for OpenCL, in case multiple OpenCL-capable platforms are available. This is ignored if interop is enabled"); + printf("enable_interop : Use OpenGL/OpenCL interoperability, avoiding memory copy between GPU and main memory"); + printf("batch_gpu : Use GPU to created solver batches. Set to zero to disable to improve compatibility with many GPUs"); + + +} + +int main(int argc, char* argv[]) +{ + + CommandLineArgs args(argc,argv); + + if (args.CheckCmdLineFlag("help")) + { + Usage(); + return 0; + } + + args.GetCmdLineArgument("x_dim", NUM_OBJECTS_X); + args.GetCmdLineArgument("y_dim", NUM_OBJECTS_Y); + args.GetCmdLineArgument("z_dim", NUM_OBJECTS_Z); + + args.GetCmdLineArgument("x_gap", X_GAP); + args.GetCmdLineArgument("y_gap", Y_GAP); + args.GetCmdLineArgument("z_gap", Z_GAP); + + args.GetCmdLineArgument("enable_interop", USE_GL_CL_INTEROP); + args.GetCmdLineArgument("preferred_gpu", preferredGPU); + args.GetCmdLineArgument("preferred_platform", preferredPlatform); + args.GetCmdLineArgument("batch_gpu", gpuBatchContacts); + + + + + printf("Dimensions (%d,%d,%d) with gap (%f,%f,%f), using interop=%d, gpu %d, cl platform %d, gpuBatchContacts %d \n",NUM_OBJECTS_X,NUM_OBJECTS_Y,NUM_OBJECTS_Z,X_GAP,Y_GAP,Z_GAP,USE_GL_CL_INTEROP,preferredGPU, preferredPlatform,gpuBatchContacts); + + + + + { + NUM_OBJECTS = NUM_OBJECTS_X*NUM_OBJECTS_Y*NUM_OBJECTS_Z; + POSITION_BUFFER_SIZE = (NUM_OBJECTS*sizeof(float)*4); + ORIENTATION_BUFFER_SIZE = (NUM_OBJECTS*sizeof(float)*4); + COLOR_BUFFER_SIZE = (NUM_OBJECTS*sizeof(float)*4); + SCALE_BUFFER_SIZE = (NUM_OBJECTS*sizeof(float)*3); + } + + srand(0); + // printf("vertexShader = \n%s\n",vertexShader); + // printf("fragmentShader = \n%s\n",fragmentShader); + + glutInit(&argc, argv); + + glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGBA); + + + glutInitWindowSize(m_glutScreenWidth, m_glutScreenHeight); + char buf[1024]; + if (USE_GL_CL_INTEROP) + { + sprintf(buf,"GPU rigid body pipeline using OpenCL - OpenGL interop, simulates %d cubes on the GPU (use c to toggle CPU/CL)", NUM_OBJECTS); + } else + { + sprintf(buf,"GPU rigid body pipeline, simulates %d cubes on the GPU (use c to toggle CPU/CL)", NUM_OBJECTS); + } + + glutCreateWindow(buf); + + glutReshapeFunc(ChangeSize); + + glutMouseFunc(mouseFunc); + glutMotionFunc(mouseMotionFunc); + + glutKeyboardFunc(Keyboard); + glutDisplayFunc(mainloop); + + GLenum err = glewInit(); + if (GLEW_OK != err) + { + /* Problem: glewInit failed, something is seriously wrong. */ + fprintf(stderr, "Error: %s\n", glewGetErrorString(err)); + } + + //ChangeSize(m_glutScreenWidth,m_glutScreenHeight); + + + InitCL(preferredGPU, preferredPlatform); + + +#define CUSTOM_CL_INITIALIZATION +#ifdef CUSTOM_CL_INITIALIZATION + g_deviceCL = new adl::DeviceCL(); + g_deviceCL->m_deviceIdx = g_device; + g_deviceCL->m_context = g_cxMainContext; + g_deviceCL->m_commandQueue = g_cqCommandQue; + g_deviceCL->m_kernelManager = new adl::KernelManager; + +#else + DeviceUtils::Config cfg; + cfg.m_type = DeviceUtils::Config::DEVICE_CPU; + g_deviceCL = DeviceUtils::allocate( TYPE_CL, cfg ); +#endif + + int size = NUM_OBJECTS; + adl::Buffer linvelBuf( g_deviceCL, size ); + adl::Buffer angvelBuf( g_deviceCL, size ); + adl::Buffer bodyTimes(g_deviceCL,size); + + gLinVelMem = (cl_mem)linvelBuf.m_ptr; + gAngVelMem = (cl_mem)angvelBuf.m_ptr; + gBodyTimes = (cl_mem)bodyTimes.m_ptr; + + btVector3* linVelHost= new btVector3[size]; + btVector3* angVelHost = new btVector3[size]; + float* bodyTimesHost = new float[size]; + + { + int index=0; + for (int i=0;i verts; + int numVertices = (sizeof(cube_vertices) )/(9*sizeof(GLfloat)); + + for (int i=0;iregisterShape(s_convexHeightField); + + InitShaders(); + + if (USE_GL_CL_INTEROP) + { + g_interopBuffer = new btOpenCLGLInteropBuffer(g_cxMainContext,g_cqCommandQue,cube_vbo); + clFinish(g_cqCommandQue); + } + + + cl_program prog = btOpenCLUtils::compileCLProgramFromString(g_cxMainContext,g_device,interopKernelString,0,"",INTEROPKERNEL_SRC_PATH); + g_integrateTransformsKernel = btOpenCLUtils::compileCLKernelFromString(g_cxMainContext, g_device,interopKernelString, "integrateTransformsKernel" ,0,prog); + + + initFindPairs(gFpIO, g_cxMainContext, g_device, g_cqCommandQue, NUM_OBJECTS); + + + + + + glutMainLoop(); + ShutdownRC(); + + return 0; +} diff --git a/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline/premake4.lua b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline/premake4.lua new file mode 100644 index 000000000..d2fbbc877 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline/premake4.lua @@ -0,0 +1,5 @@ + + include "AMD" +-- include "Intel" + include "NVIDIA" + \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/AMD/premake4.lua b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/AMD/premake4.lua new file mode 100644 index 000000000..8abbebb18 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/AMD/premake4.lua @@ -0,0 +1,64 @@ + + hasCL = findOpenCL_AMD() + + if (hasCL) then + + project "OpenCL_gpu_rigidbody_pipeline2_AMD" + + initOpenCL_AMD() + + language "C++" + + kind "ConsoleApp" + targetdir "../../../bin" + + + initOpenGL() + initGlew() + + includedirs { + "../../primitives", + "../../../../../src" + } + + files { + "../main.cpp", + "../CLPhysicsDemo.cpp", + "../CLPhysicsDemo.h", + "../GLInstancingRenderer.cpp", + "../GLInstancingRenderer.h", + "../GlutRenderer.cpp", + "../GlutRenderer.h", + "../Win32OpenGLRenderManager.cpp", + "../Win32OpenGLRenderManager.h", + "../../gpu_rigidbody_pipeline/btConvexUtility.cpp", + "../../gpu_rigidbody_pipeline/btConvexUtility.h", + "../../gpu_rigidbody_pipeline/btGpuNarrowPhaseAndSolver.cpp", + "../../gpu_rigidbody_pipeline/btGpuNarrowPhaseAndSolver.h", + "../../../dynamics/basic_demo/ConvexHeightFieldShape.cpp", + "../../../dynamics/basic_demo/ConvexHeightFieldShape.h", + "../../../../../src/LinearMath/btConvexHullComputer.cpp", + "../../../../../src/LinearMath/btConvexHullComputer.h", + "../../broadphase_benchmark/findPairsOpenCL.cpp", + "../../broadphase_benchmark/findPairsOpenCL.h", + "../../broadphase_benchmark/btGridBroadphaseCL.cpp", + "../../broadphase_benchmark/btGridBroadphaseCL.h", + "../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.cpp", + "../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.h", + "../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.cpp", + "../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.h", + "../../../../../src/LinearMath/btAlignedAllocator.cpp", + "../../../../../src/LinearMath/btQuickprof.cpp", + "../../../../../src/LinearMath/btQuickprof.h", + "../../../../../src/BulletCollision/BroadphaseCollision/btBroadphaseProxy.cpp", + "../../../../../src/BulletCollision/BroadphaseCollision/btOverlappingPairCache.cpp", + "../../../../../src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp", + "../../basic_initialize/btOpenCLUtils.cpp", + "../../basic_initialize/btOpenCLUtils.h", + "../../opengl_interop/btOpenCLGLInteropBuffer.cpp", + "../../opengl_interop/btOpenCLGLInteropBuffer.h", + "../../opengl_interop/btStopwatch.cpp", + "../../opengl_interop/btStopwatch.h" + } + + end diff --git a/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/CLPhysicsDemo.cpp b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/CLPhysicsDemo.cpp new file mode 100644 index 000000000..363f57525 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/CLPhysicsDemo.cpp @@ -0,0 +1,529 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Erwin Coumans + +#include "OpenGLInclude.h" + +#include "CLPhysicsDemo.h" +#include "LinearMath/btAlignedObjectArray.h" +#include "DemoSettings.h" +#include "../basic_initialize/btOpenCLUtils.h" +#include "../opengl_interop/btOpenCLGLInteropBuffer.h" +#include "../broadphase_benchmark/findPairsOpenCL.h" +#include "LinearMath/btVector3.h" +#include "LinearMath/btQuaternion.h" +#include "LinearMath/btMatrix3x3.h" +#include "../../opencl/gpu_rigidbody_pipeline/btGpuNarrowPhaseAndSolver.h" +#include "../../opencl/gpu_rigidbody_pipeline/btConvexUtility.h" +#include "../../dynamics/basic_demo/ConvexHeightFieldShape.h" +#include "../broadphase_benchmark/btGridBroadphaseCl.h" +#include "LinearMath/btQuickprof.h" + + +#define MSTRINGIFY(A) #A +static char* interopKernelString = +#include "../broadphase_benchmark/integrateKernel.cl" + +#define INTEROPKERNEL_SRC_PATH "../../opencl/broadphase_benchmark/integrateKernel.cl" + +cl_kernel g_integrateTransformsKernel; + + + +bool runOpenCLKernels = true; + + +btGpuNarrowphaseAndSolver* narrowphaseAndSolver = 0; +ConvexHeightField* s_convexHeightField = 0 ; +btOpenCLGLInteropBuffer* g_interopBuffer = 0; + +extern GLuint cube_vbo; +extern int VBOsize; + +cl_mem clBuffer=0; +char* hostPtr=0; +cl_bool blocking= CL_TRUE; + + + +btFindPairsIO gFpIO; + +cl_context g_cxMainContext; +cl_command_queue g_cqCommandQue; +cl_device_id g_device; + +cl_mem gLinVelMem=0; +cl_mem gAngVelMem=0; +cl_mem gBodyTimes=0; + +#include + +adl::DeviceCL* g_deviceCL=0; + +struct btAABBHost //keep this in sync with btAABBCL! +{ + float fx; + float fy; + float fz; + unsigned int uw; +}; + +struct InternalData +{ + adl::Buffer* m_linVelBuf; + adl::Buffer* m_angVelBuf; + adl::Buffer* m_bodyTimes; + bool m_useInterop; + btGridBroadphaseCl* m_Broadphase; + + adl::Buffer* m_localShapeAABB; + + btVector3* m_linVelHost; + btVector3* m_angVelHost; + float* m_bodyTimesHost; + + InternalData():m_linVelBuf(0),m_angVelBuf(0),m_bodyTimes(0),m_useInterop(0),m_Broadphase(0) + { + m_linVelHost= new btVector3[MAX_CONVEX_BODIES_CL]; + m_angVelHost = new btVector3[MAX_CONVEX_BODIES_CL]; + m_bodyTimesHost = new float[MAX_CONVEX_BODIES_CL]; + } + ~InternalData() + { + delete[] m_linVelHost; + delete[] m_angVelHost; + delete[] m_bodyTimesHost; + + } +}; + + +void InitCL(int preferredDeviceIndex, int preferredPlatformIndex, bool useInterop) +{ + void* glCtx=0; + void* glDC = 0; + +#ifdef _WIN32 + glCtx = wglGetCurrentContext(); +#else //!_WIN32 + GLXContext glCtx = glXGetCurrentContext(); +#endif //!_WIN32 + glDC = wglGetCurrentDC(); + + int ciErrNum = 0; +#ifdef CL_PLATFORM_INTEL + cl_device_type deviceType = CL_DEVICE_TYPE_ALL; +#else + cl_device_type deviceType = CL_DEVICE_TYPE_GPU; +#endif + + + + if (useInterop) + { + g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC); + } else + { + g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex); + } + + + oclCHECKERROR(ciErrNum, CL_SUCCESS); + + int numDev = btOpenCLUtils::getNumDevices(g_cxMainContext); + + if (numDev>0) + { + g_device= btOpenCLUtils::getDevice(g_cxMainContext,0); + btOpenCLDeviceInfo clInfo; + btOpenCLUtils::getDeviceInfo(g_device,clInfo); + btOpenCLUtils::printDeviceInfo(g_device); + g_cqCommandQue = clCreateCommandQueue(g_cxMainContext, g_device, 0, &ciErrNum); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + } + +} + + + + +CLPhysicsDemo::CLPhysicsDemo(Win32OpenGLWindow* renderer) +{ + m_numCollisionShapes=0; + m_numPhysicsInstances=0; + + m_data = new InternalData; +} + +CLPhysicsDemo::~CLPhysicsDemo() +{ + +} + + +void CLPhysicsDemo::writeBodiesToGpu() +{ + if (narrowphaseAndSolver) + narrowphaseAndSolver->writeAllBodiesToGpu(); +} + +int CLPhysicsDemo::registerCollisionShape(const float* vertices, int strideInBytes, int numVertices, const float* scaling) +{ + btAlignedObjectArray verts; + + unsigned char* vts = (unsigned char*) vertices; + for (int i=0;iregisterShape(s_convexHeightField); + + if (shapeIndex>=0) + { + btAABBHost aabbMin, aabbMax; + aabbMin.fx = s_convexHeightField->m_aabb.m_min.x; + aabbMin.fy = s_convexHeightField->m_aabb.m_min.y; + aabbMin.fz= s_convexHeightField->m_aabb.m_min.z; + aabbMin.uw = shapeIndex; + + aabbMax.fx = s_convexHeightField->m_aabb.m_max.x; + aabbMax.fy = s_convexHeightField->m_aabb.m_max.y; + aabbMax.fz= s_convexHeightField->m_aabb.m_max.z; + aabbMax.uw = shapeIndex; + + m_data->m_localShapeAABB->write(&aabbMin,1,shapeIndex*2); + m_data->m_localShapeAABB->write(&aabbMax,1,shapeIndex*2+1); + adl::DeviceUtils::waitForCompletion( g_deviceCL ); + } + + m_numCollisionShapes++; + delete[] eqn; + return shapeIndex; +} + +int CLPhysicsDemo::registerPhysicsInstance(float mass, const float* position, const float* orientation, int collisionShapeIndex, void* userPointer) +{ + btVector3 aabbMin(position[0],position[0],position[0]); + btVector3 aabbMax = aabbMin; + aabbMin -= btVector3(1.f,1.f,1.f); + aabbMax += btVector3(1.f,1.f,1.f); + + if (collisionShapeIndex>=0) + { + btBroadphaseProxy* proxy = m_data->m_Broadphase->createProxy(aabbMin,aabbMax,collisionShapeIndex,userPointer,1,1,0,0);//m_dispatcher); + } + + bool writeToGpu = false; + int bodyIndex = -1; + + if (narrowphaseAndSolver) + bodyIndex = narrowphaseAndSolver->registerRigidBody(collisionShapeIndex,mass,position,orientation,writeToGpu); + + m_numPhysicsInstances++; + return bodyIndex; +} + + + +void CLPhysicsDemo::init(int preferredDevice, int preferredPlatform, bool useInterop) +{ + + InitCL(-1,-1,useInterop); + +#define CUSTOM_CL_INITIALIZATION +#ifdef CUSTOM_CL_INITIALIZATION + g_deviceCL = new adl::DeviceCL(); + g_deviceCL->m_deviceIdx = g_device; + g_deviceCL->m_context = g_cxMainContext; + g_deviceCL->m_commandQueue = g_cqCommandQue; + g_deviceCL->m_kernelManager = new adl::KernelManager; + +#else + DeviceUtils::Config cfg; + cfg.m_type = DeviceUtils::Config::DEVICE_CPU; + g_deviceCL = DeviceUtils::allocate( TYPE_CL, cfg ); +#endif + + //adl::Solver::allocate(g_deviceCL->allocate( + m_data->m_linVelBuf = new adl::Buffer(g_deviceCL,MAX_CONVEX_BODIES_CL); + m_data->m_angVelBuf = new adl::Buffer(g_deviceCL,MAX_CONVEX_BODIES_CL); + m_data->m_bodyTimes = new adl::Buffer(g_deviceCL,MAX_CONVEX_BODIES_CL); + + m_data->m_localShapeAABB = new adl::Buffer(g_deviceCL,MAX_CONVEX_SHAPES_CL); + + gLinVelMem = (cl_mem)m_data->m_linVelBuf->m_ptr; + gAngVelMem = (cl_mem)m_data->m_angVelBuf->m_ptr; + gBodyTimes = (cl_mem)m_data->m_bodyTimes->m_ptr; + + + + + narrowphaseAndSolver = new btGpuNarrowphaseAndSolver(g_deviceCL); + + + + int maxObjects = btMax(256,MAX_CONVEX_BODIES_CL); + int maxPairsSmallProxy = 32; + btOverlappingPairCache* overlappingPairCache=0; + + m_data->m_Broadphase = new btGridBroadphaseCl(overlappingPairCache,btVector3(4.f, 4.f, 4.f), 128, 128, 128,maxObjects, maxObjects, maxPairsSmallProxy, 100.f, 128, + g_cxMainContext ,g_device,g_cqCommandQue, g_deviceCL); + + + + cl_program prog = btOpenCLUtils::compileCLProgramFromString(g_cxMainContext,g_device,interopKernelString,0,"",INTEROPKERNEL_SRC_PATH); + g_integrateTransformsKernel = btOpenCLUtils::compileCLKernelFromString(g_cxMainContext, g_device,interopKernelString, "integrateTransformsKernel" ,0,prog); + + + initFindPairs(gFpIO, g_cxMainContext, g_device, g_cqCommandQue, MAX_CONVEX_BODIES_CL); + + + + +} + + + +void CLPhysicsDemo::writeVelocitiesToGpu() +{ + m_data->m_linVelBuf->write(m_data->m_linVelHost,MAX_CONVEX_BODIES_CL); + m_data->m_angVelBuf->write(m_data->m_angVelHost,MAX_CONVEX_BODIES_CL); + m_data->m_bodyTimes->write(m_data->m_bodyTimesHost,MAX_CONVEX_BODIES_CL); + adl::DeviceUtils::waitForCompletion( g_deviceCL ); +} + + +void CLPhysicsDemo::setupInterop() +{ + m_data->m_useInterop = true; + + g_interopBuffer = new btOpenCLGLInteropBuffer(g_cxMainContext,g_cqCommandQue,cube_vbo); + clFinish(g_cqCommandQue); +} + +void CLPhysicsDemo::cleanup() +{ + delete narrowphaseAndSolver; + + delete m_data->m_linVelBuf; + delete m_data->m_angVelBuf; + delete m_data->m_bodyTimes; + delete m_data->m_localShapeAABB; + + delete m_data->m_Broadphase; + delete m_data; + + delete g_deviceCL->m_kernelManager; + delete g_deviceCL; + + m_data=0; + g_deviceCL=0; + delete g_interopBuffer; + delete s_convexHeightField; +} + + + + + +void CLPhysicsDemo::stepSimulation() +{ + BT_PROFILE("simulationLoop"); + + { + BT_PROFILE("glFinish"); + glFinish(); + } + cl_int ciErrNum = CL_SUCCESS; + + + if(m_data->m_useInterop) + { + clBuffer = g_interopBuffer->getCLBUffer(); + BT_PROFILE("clEnqueueAcquireGLObjects"); + ciErrNum = clEnqueueAcquireGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, NULL); + adl::DeviceUtils::waitForCompletion( g_deviceCL ); + } else + { + + glBindBuffer(GL_ARRAY_BUFFER, cube_vbo); + glFlush(); + + BT_PROFILE("glMapBuffer and clEnqueueWriteBuffer"); + + blocking= CL_TRUE; + hostPtr= (char*)glMapBuffer( GL_ARRAY_BUFFER,GL_READ_WRITE);//GL_WRITE_ONLY + if (!clBuffer) + { + clBuffer = clCreateBuffer(g_cxMainContext, CL_MEM_READ_WRITE, VBOsize, 0, &ciErrNum); + } + adl::DeviceUtils::waitForCompletion( g_deviceCL ); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + + ciErrNum = clEnqueueWriteBuffer ( g_cqCommandQue, + clBuffer, + blocking, + 0, + VBOsize, + hostPtr,0,0,0 + ); + adl::DeviceUtils::waitForCompletion( g_deviceCL ); + } + + + + oclCHECKERROR(ciErrNum, CL_SUCCESS); + if (runOpenCLKernels && m_numPhysicsInstances) + { + + gFpIO.m_numObjects = m_numPhysicsInstances; + gFpIO.m_positionOffset = SHAPE_VERTEX_BUFFER_SIZE/4; + gFpIO.m_clObjectsBuffer = clBuffer; + gFpIO.m_dAABB = m_data->m_Broadphase->m_dAABB; + gFpIO.m_dlocalShapeAABB = (cl_mem)m_data->m_localShapeAABB->m_ptr; + gFpIO.m_numOverlap = 0; + { + BT_PROFILE("setupGpuAabbs"); + setupGpuAabbsFull(gFpIO,narrowphaseAndSolver->getBodiesGpu() ); + } + if (1) + { + BT_PROFILE("calculateOverlappingPairs"); + m_data->m_Broadphase->calculateOverlappingPairs(0, m_numPhysicsInstances); + gFpIO.m_dAllOverlappingPairs = m_data->m_Broadphase->m_dAllOverlappingPairs; + gFpIO.m_numOverlap = m_data->m_Broadphase->m_numPrefixSum; + } + + //printf("gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap ); + if (gFpIO.m_numOverlap>=0 && gFpIO.m_numOverlapgetBodiesGpu(), narrowphaseAndSolver->getBodyInertiasGpu()); + } + if (gFpIO.m_numOverlap) + { + BT_PROFILE("computeContactsAndSolver"); + if (narrowphaseAndSolver) + narrowphaseAndSolver->computeContactsAndSolver(gFpIO.m_dAllOverlappingPairs,gFpIO.m_numOverlap); + } + + { + BT_PROFILE("copyBodyVelocities"); + if (narrowphaseAndSolver) + copyBodyVelocities(gFpIO, gLinVelMem, gAngVelMem, narrowphaseAndSolver->getBodiesGpu(), narrowphaseAndSolver->getBodyInertiasGpu()); + } + } + + } else + { + printf("error, gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap); + btAssert(0); + } + + + { + BT_PROFILE("integrateTransforms"); + + if (runOpenCLKernels) + { + int numObjects = m_numPhysicsInstances; + int offset = SHAPE_VERTEX_BUFFER_SIZE/4; + + ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 0, sizeof(int), &offset); + ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 1, sizeof(int), &numObjects); + ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 2, sizeof(cl_mem), (void*)&clBuffer ); + + ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 3, sizeof(cl_mem), (void*)&gLinVelMem); + ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 4, sizeof(cl_mem), (void*)&gAngVelMem); + ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 5, sizeof(cl_mem), (void*)&gBodyTimes); + + + + + size_t workGroupSize = 64; + size_t numWorkItems = workGroupSize*((m_numPhysicsInstances + (workGroupSize)) / workGroupSize); + + if (workGroupSize>numWorkItems) + workGroupSize=numWorkItems; + + ciErrNum = clEnqueueNDRangeKernel(g_cqCommandQue, g_integrateTransformsKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + } + } + + + } + + if(m_data->m_useInterop) + { + BT_PROFILE("clEnqueueReleaseGLObjects"); + ciErrNum = clEnqueueReleaseGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, 0); + adl::DeviceUtils::waitForCompletion( g_deviceCL ); + } + else + { + BT_PROFILE("clEnqueueReadBuffer clReleaseMemObject and glUnmapBuffer"); + ciErrNum = clEnqueueReadBuffer ( g_cqCommandQue, + clBuffer, + blocking, + 0, + VBOsize, + hostPtr,0,0,0); + + //clReleaseMemObject(clBuffer); + adl::DeviceUtils::waitForCompletion( g_deviceCL ); + glUnmapBuffer( GL_ARRAY_BUFFER); + glFlush(); + } + + oclCHECKERROR(ciErrNum, CL_SUCCESS); + + + if (runOpenCLKernels) + { + BT_PROFILE("clFinish"); + clFinish(g_cqCommandQue); + } + + +} diff --git a/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/CLPhysicsDemo.h b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/CLPhysicsDemo.h new file mode 100644 index 000000000..0ed2e7392 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/CLPhysicsDemo.h @@ -0,0 +1,53 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Erwin Coumans + +#ifndef CL_PHYSICS_DEMO_H +#define CL_PHYSICS_DEMO_H + +class Win32OpenGLWindow; + +struct CLPhysicsDemo +{ + Win32OpenGLWindow* m_renderer; + + int m_numCollisionShapes; + + int m_numPhysicsInstances; + + struct InternalData* m_data; + + CLPhysicsDemo(Win32OpenGLWindow* renderer); + + virtual ~CLPhysicsDemo(); + + //btOpenCLGLInteropBuffer* m_interopBuffer; + + void init(int preferredDevice, int preferredPlatform, bool useInterop); + + void setupInterop(); + + int registerCollisionShape(const float* vertices, int strideInBytes, int numVertices, const float* scaling); + + int registerPhysicsInstance(float mass, const float* position, const float* orientation, int collisionShapeIndex, void* userPointer); + + void writeVelocitiesToGpu(); + void writeBodiesToGpu(); + + void cleanup(); + + void stepSimulation(); +}; + +#endif//CL_PHYSICS_DEMO_H \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/DemoSettings.h b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/DemoSettings.h new file mode 100644 index 000000000..5b55e001d --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/DemoSettings.h @@ -0,0 +1,24 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Erwin Coumans + +#ifndef DEMO_SETTINGS_H +#define DEMO_SETTINGS_H + +#define SHAPE_VERTEX_BUFFER_SIZE 1024*1024 + +#define SHAPE_BUFFER_SIZE (SHAPE_VERTEX_BUFFER_SIZE) + + +#endif //DEMO_SETTINGS_H \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/GLInstancingRenderer.cpp b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/GLInstancingRenderer.cpp new file mode 100644 index 000000000..9e7525362 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/GLInstancingRenderer.cpp @@ -0,0 +1,861 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Erwin Coumans + +#include "OpenGLInclude.h" +#include "GLInstancingRenderer.h" + +#include +#include "DemoSettings.h" +#include +#include +#include "LinearMath/btVector3.h" +#include "LinearMath/btQuaternion.h" +#include "LinearMath/btQuickprof.h" +#include "LinearMath/btMatrix3x3.h" + +#include "../../opencl/gpu_rigidbody_pipeline/btGpuNarrowphaseAndSolver.h"//for MAX_CONVEX_BODIES_CL + +struct btGraphicsInstance +{ + GLuint m_cube_vao; + GLuint m_index_vbo; + int m_numIndices; + int m_numVertices; + + int m_numGraphicsInstances; + + int m_instanceOffset; + int m_vertexArrayOffset; + + btGraphicsInstance() :m_cube_vao(-1),m_index_vbo(-1),m_numIndices(-1),m_numVertices(-1),m_numGraphicsInstances(0),m_instanceOffset(0),m_vertexArrayOffset(0) + { + } + +}; + + + +bool m_ortho = false; +int m_glutScreenWidth = 1024; +int m_glutScreenHeight = 768; + + + +extern int gShapeIndex; + + +btVector3 m_cameraPosition(0,0,0);//will be overridden by a position computed from azi/ele +btVector3 m_cameraTargetPosition(30,-5,-20); +btScalar m_cameraDistance = 95; +btVector3 m_cameraUp(0,1,0); +float m_azi=95.f; +float m_ele=15.f; + + + + +int VBOsize =0; + + + +struct InternalDataRenderer +{ + GLfloat* m_instance_positions_ptr; + GLfloat* m_instance_quaternion_ptr; + GLfloat* m_instance_colors_ptr; + GLfloat* m_instance_scale_ptr; + + InternalDataRenderer() :m_instance_positions_ptr (0),m_instance_quaternion_ptr(0),m_instance_colors_ptr(0),m_instance_scale_ptr(0) + { + } + +}; + +static GLuint instancingShader; // The instancing renderer + +GLuint cube_vbo; + +static GLuint m_texturehandle; + +static bool done = false; +static GLint angle_loc = 0; +static GLint ModelViewMatrix; +static GLint ProjectionMatrix; + + + +GLInstancingRenderer::GLInstancingRenderer() +{ + + m_data = new InternalDataRenderer; + + m_data->m_instance_positions_ptr = (GLfloat*)new float[MAX_CONVEX_BODIES_CL*4]; + m_data->m_instance_quaternion_ptr = (GLfloat*)new float[MAX_CONVEX_BODIES_CL*4]; + m_data->m_instance_colors_ptr = (GLfloat*)new float[MAX_CONVEX_BODIES_CL*4]; + m_data->m_instance_scale_ptr = (GLfloat*)new float[MAX_CONVEX_BODIES_CL*3]; + +} + +GLInstancingRenderer::~GLInstancingRenderer() +{ + delete m_data; +} + + +static GLint uniform_texture_diffuse = 0; + +//used for dynamic loading from disk (default switched off) +#define MAX_SHADER_LENGTH 8192 +static GLubyte shaderText[MAX_SHADER_LENGTH]; + +static const char* vertexShader= \ +"#version 330\n" +"precision highp float;\n" +"\n" +"\n" +"\n" +"layout (location = 0) in vec4 position;\n" +"layout (location = 1) in vec4 instance_position;\n" +"layout (location = 2) in vec4 instance_quaternion;\n" +"layout (location = 3) in vec2 uvcoords;\n" +"layout (location = 4) in vec3 vertexnormal;\n" +"layout (location = 5) in vec4 instance_color;\n" +"layout (location = 6) in vec3 instance_scale;\n" +"\n" +"\n" +"uniform float angle = 0.0;\n" +"uniform mat4 ModelViewMatrix;\n" +"uniform mat4 ProjectionMatrix;\n" +"\n" +"out Fragment\n" +"{\n" +" vec4 color;\n" +"} fragment;\n" +"\n" +"out Vert\n" +"{\n" +" vec2 texcoord;\n" +"} vert;\n" +"\n" +"\n" +"vec4 quatMul ( in vec4 q1, in vec4 q2 )\n" +"{\n" +" vec3 im = q1.w * q2.xyz + q1.xyz * q2.w + cross ( q1.xyz, q2.xyz );\n" +" vec4 dt = q1 * q2;\n" +" float re = dot ( dt, vec4 ( -1.0, -1.0, -1.0, 1.0 ) );\n" +" return vec4 ( im, re );\n" +"}\n" +"\n" +"vec4 quatFromAxisAngle(vec4 axis, in float angle)\n" +"{\n" +" float cah = cos(angle*0.5);\n" +" float sah = sin(angle*0.5);\n" +" float d = inversesqrt(dot(axis,axis));\n" +" vec4 q = vec4(axis.x*sah*d,axis.y*sah*d,axis.z*sah*d,cah);\n" +" return q;\n" +"}\n" +"//\n" +"// vector rotation via quaternion\n" +"//\n" +"vec4 quatRotate3 ( in vec3 p, in vec4 q )\n" +"{\n" +" vec4 temp = quatMul ( q, vec4 ( p, 0.0 ) );\n" +" return quatMul ( temp, vec4 ( -q.x, -q.y, -q.z, q.w ) );\n" +"}\n" +"vec4 quatRotate ( in vec4 p, in vec4 q )\n" +"{\n" +" vec4 temp = quatMul ( q, p );\n" +" return quatMul ( temp, vec4 ( -q.x, -q.y, -q.z, q.w ) );\n" +"}\n" +"\n" +"out vec3 lightDir,normal,ambient;\n" +"\n" +"void main(void)\n" +"{\n" +" vec4 q = instance_quaternion;\n" +" ambient = vec3(0.3,.3,0.3);\n" +" \n" +" \n" +" vec4 local_normal = (quatRotate3( vertexnormal,q));\n" +" vec3 light_pos = vec3(-0.8,1,-0.6);\n" +" normal = local_normal.xyz;\n"//normalize(ModelViewMatrix * local_normal).xyz;\n" +"\n" +" lightDir = normalize(light_pos);//gl_LightSource[0].position.xyz));\n" +"// lightDir = normalize(vec3(gl_LightSource[0].position));\n" +" \n" +" vec4 axis = vec4(1,1,1,0);\n" +" vec4 localcoord = quatRotate3( position.xyz*instance_scale,q);\n" +" vec4 vertexPos = ProjectionMatrix * ModelViewMatrix *(instance_position+localcoord);\n" +"\n" +" gl_Position = vertexPos;\n" +" \n" +" fragment.color = instance_color;\n" +" vert.texcoord = uvcoords;\n" +"}\n" +; + + +static const char* fragmentShader= \ +"#version 330\n" +"precision highp float;\n" +"\n" +"in Fragment\n" +"{\n" +" vec4 color;\n" +"} fragment;\n" +"\n" +"in Vert\n" +"{\n" +" vec2 texcoord;\n" +"} vert;\n" +"\n" +"uniform sampler2D Diffuse;\n" +"\n" +"in vec3 lightDir,normal,ambient;\n" +"\n" +"out vec4 color;\n" +"\n" +"void main_textured(void)\n" +"{\n" +" color = texture2D(Diffuse,vert.texcoord);//fragment.color;\n" +"}\n" +"\n" +"void main(void)\n" +"{\n" +" vec4 texel = fragment.color*texture2D(Diffuse,vert.texcoord);//fragment.color;\n" +" vec3 ct,cf;\n" +" float intensity,at,af;\n" +" intensity = max(dot(lightDir,normalize(normal)),.2);\n" +" cf = intensity*vec3(1.0,1.0,1.0)+ambient;" +" af = 1.0;\n" +" \n" +" ct = texel.rgb;\n" +" at = texel.a;\n" +" \n" +" color = vec4(ct * cf, at * af); \n" +"}\n" +; + + +// Load the shader from the source text +void gltLoadShaderSrc(const char *szShaderSrc, GLuint shader) +{ + GLchar *fsStringPtr[1]; + + fsStringPtr[0] = (GLchar *)szShaderSrc; + glShaderSource(shader, 1, (const GLchar **)fsStringPtr, NULL); +} + + +GLuint gltLoadShaderPair(const char *szVertexProg, const char *szFragmentProg) +{ + // Temporary Shader objects + GLuint hVertexShader; + GLuint hFragmentShader; + GLuint hReturn = 0; + GLint testVal; + + // Create shader objects + hVertexShader = glCreateShader(GL_VERTEX_SHADER); + hFragmentShader = glCreateShader(GL_FRAGMENT_SHADER); + + gltLoadShaderSrc(vertexShader, hVertexShader); + gltLoadShaderSrc(fragmentShader, hFragmentShader); + + // Compile them + glCompileShader(hVertexShader); + glCompileShader(hFragmentShader); + + // Check for errors + glGetShaderiv(hVertexShader, GL_COMPILE_STATUS, &testVal); + if(testVal == GL_FALSE) + { + char temp[256] = ""; + glGetShaderInfoLog( hVertexShader, 256, NULL, temp); + fprintf( stderr, "Compile failed:\n%s\n", temp); + assert(0); + exit(0); + glDeleteShader(hVertexShader); + glDeleteShader(hFragmentShader); + return (GLuint)NULL; + } + + glGetShaderiv(hFragmentShader, GL_COMPILE_STATUS, &testVal); + if(testVal == GL_FALSE) + { + char temp[256] = ""; + glGetShaderInfoLog( hFragmentShader, 256, NULL, temp); + fprintf( stderr, "Compile failed:\n%s\n", temp); + assert(0); + exit(0); + glDeleteShader(hVertexShader); + glDeleteShader(hFragmentShader); + return (GLuint)NULL; + } + + // Link them - assuming it works... + hReturn = glCreateProgram(); + glAttachShader(hReturn, hVertexShader); + glAttachShader(hReturn, hFragmentShader); + + glLinkProgram(hReturn); + + // These are no longer needed + glDeleteShader(hVertexShader); + glDeleteShader(hFragmentShader); + + // Make sure link worked too + glGetProgramiv(hReturn, GL_LINK_STATUS, &testVal); + if(testVal == GL_FALSE) + { + glDeleteProgram(hReturn); + return (GLuint)NULL; + } + + return hReturn; +} + + +void GLInstancingRenderer::writeTransforms() +{ + glBindBuffer(GL_ARRAY_BUFFER, cube_vbo); + glFlush(); + + char* orgBase = (char*)glMapBuffer( GL_ARRAY_BUFFER,GL_READ_WRITE); + + int totalNumInstances= 0; + + for (int k=0;km_numGraphicsInstances; + } + + + + for (int k=0;km_numGraphicsInstances;i++) + { + + int srcIndex=i+gfxObj->m_instanceOffset; + + positions[srcIndex*4] = m_data->m_instance_positions_ptr[srcIndex*4]; + positions[srcIndex*4+1] = m_data->m_instance_positions_ptr[srcIndex*4+1]; + positions[srcIndex*4+2] = m_data->m_instance_positions_ptr[srcIndex*4+2]; + positions[srcIndex*4+3] = m_data->m_instance_positions_ptr[srcIndex*4+3]; + + orientations[srcIndex*4]=m_data->m_instance_quaternion_ptr[srcIndex*4]; + orientations[srcIndex*4+1]=m_data->m_instance_quaternion_ptr[srcIndex*4+1]; + orientations[srcIndex*4+2]=m_data->m_instance_quaternion_ptr[srcIndex*4+2]; + orientations[srcIndex*4+3]=m_data->m_instance_quaternion_ptr[srcIndex*4+3]; + + colors[srcIndex*4]=m_data->m_instance_colors_ptr[srcIndex*4]; + colors[srcIndex*4+1]=m_data->m_instance_colors_ptr[srcIndex*4+1]; + colors[srcIndex*4+2]=m_data->m_instance_colors_ptr[srcIndex*4+2]; + colors[srcIndex*4+3]=m_data->m_instance_colors_ptr[srcIndex*4+3]; + + scaling[srcIndex*3]=m_data->m_instance_scale_ptr[srcIndex*3]; + scaling[srcIndex*3+1]=m_data->m_instance_scale_ptr[srcIndex*3+1]; + scaling[srcIndex*3+2]=m_data->m_instance_scale_ptr[srcIndex*3+2]; + + } + } + + glUnmapBuffer( GL_ARRAY_BUFFER); + //if this glFinish is removed, the animation is not always working/blocks + //@todo: figure out why + glFlush(); +} + +int GLInstancingRenderer::registerGraphicsInstance(int shapeIndex, const float* position, const float* quaternion, const float* color, const float* scaling) +{ + btGraphicsInstance* gfxObj = m_graphicsInstances[shapeIndex]; + + int index = gfxObj->m_numGraphicsInstances + gfxObj->m_instanceOffset; + + + + m_data->m_instance_positions_ptr[index*4]=position[0]; + m_data->m_instance_positions_ptr[index*4+1]=position[1]; + m_data->m_instance_positions_ptr[index*4+2]=position[2]; + m_data->m_instance_positions_ptr[index*4+3]=1; + + m_data->m_instance_quaternion_ptr[index*4]=quaternion[0]; + m_data->m_instance_quaternion_ptr[index*4+1]=quaternion[1]; + m_data->m_instance_quaternion_ptr[index*4+2]=quaternion[2]; + m_data->m_instance_quaternion_ptr[index*4+3]=quaternion[3]; + + m_data->m_instance_colors_ptr[index*4]=color[0]; + m_data->m_instance_colors_ptr[index*4+1]=color[1]; + m_data->m_instance_colors_ptr[index*4+2]=color[2]; + m_data->m_instance_colors_ptr[index*4+3]=color[3]; + + m_data->m_instance_scale_ptr[index*3] = scaling[0]; + m_data->m_instance_scale_ptr[index*3+1] = scaling[1]; + m_data->m_instance_scale_ptr[index*3+2] = scaling[2]; + + gfxObj->m_numGraphicsInstances++; + return gfxObj->m_numGraphicsInstances; +} + + +int GLInstancingRenderer::registerShape(const float* vertices, int numvertices, const int* indices, int numIndices) +{ + btGraphicsInstance* gfxObj = new btGraphicsInstance; + + if (m_graphicsInstances.size()) + { + btGraphicsInstance* prevObj = m_graphicsInstances[m_graphicsInstances.size()-1]; + gfxObj->m_instanceOffset = prevObj->m_instanceOffset + prevObj->m_numGraphicsInstances; + gfxObj->m_vertexArrayOffset = prevObj->m_vertexArrayOffset + prevObj->m_numVertices; + } else + { + gfxObj->m_instanceOffset = 0; + } + + m_graphicsInstances.push_back(gfxObj); + gfxObj->m_numIndices = numIndices; + gfxObj->m_numVertices = numvertices; + + + glBindBuffer(GL_ARRAY_BUFFER, cube_vbo); + char* dest= (char*)glMapBuffer( GL_ARRAY_BUFFER,GL_WRITE_ONLY);//GL_WRITE_ONLY + int vertexStrideInBytes = 9*sizeof(float); + int sz = numvertices*vertexStrideInBytes; + memcpy(dest+vertexStrideInBytes*gfxObj->m_vertexArrayOffset,vertices,sz); + glUnmapBuffer( GL_ARRAY_BUFFER); + + glGenBuffers(1, &gfxObj->m_index_vbo); + + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, gfxObj->m_index_vbo); + int indexBufferSizeInBytes = gfxObj->m_numIndices*sizeof(int); + + glBufferData(GL_ELEMENT_ARRAY_BUFFER, indexBufferSizeInBytes, NULL, GL_STATIC_DRAW); + glBufferSubData(GL_ELEMENT_ARRAY_BUFFER,0,indexBufferSizeInBytes,indices); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); + + glGenVertexArrays(1, &gfxObj->m_cube_vao); + glBindVertexArray(gfxObj->m_cube_vao); + glBindBuffer(GL_ARRAY_BUFFER, cube_vbo); + glBindVertexArray(0); + glBindBuffer(GL_ARRAY_BUFFER,0); + glBindVertexArray(0); + + + return m_graphicsInstances.size()-1; +} + + + + +void GLInstancingRenderer::InitShaders() +{ + + int POSITION_BUFFER_SIZE = (MAX_CONVEX_BODIES_CL*sizeof(float)*4); + int ORIENTATION_BUFFER_SIZE = (MAX_CONVEX_BODIES_CL*sizeof(float)*4); + int COLOR_BUFFER_SIZE = (MAX_CONVEX_BODIES_CL*sizeof(float)*4); + int SCALE_BUFFER_SIZE = (MAX_CONVEX_BODIES_CL*sizeof(float)*3); + + + instancingShader = gltLoadShaderPair(vertexShader,fragmentShader); + + glLinkProgram(instancingShader); + glUseProgram(instancingShader); + angle_loc = glGetUniformLocation(instancingShader, "angle"); + ModelViewMatrix = glGetUniformLocation(instancingShader, "ModelViewMatrix"); + ProjectionMatrix = glGetUniformLocation(instancingShader, "ProjectionMatrix"); + uniform_texture_diffuse = glGetUniformLocation(instancingShader, "Diffuse"); + + GLuint offset = 0; + + + glGenBuffers(1, &cube_vbo); + glBindBuffer(GL_ARRAY_BUFFER, cube_vbo); + + + int size = SHAPE_BUFFER_SIZE + POSITION_BUFFER_SIZE+ORIENTATION_BUFFER_SIZE+COLOR_BUFFER_SIZE+SCALE_BUFFER_SIZE; + VBOsize = size; + + glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW);//GL_STATIC_DRAW); + + glBindBuffer(GL_ARRAY_BUFFER,0); + glBindVertexArray(0); + + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); + + +} + + +void myinit() +{ + GLint err = glGetError(); + + // GLfloat light_ambient[] = { btScalar(0.2), btScalar(0.2), btScalar(0.2), btScalar(1.0) }; + GLfloat light_ambient[] = { btScalar(1.0), btScalar(1.2), btScalar(0.2), btScalar(1.0) }; + + GLfloat light_diffuse[] = { btScalar(1.0), btScalar(1.0), btScalar(1.0), btScalar(1.0) }; + GLfloat light_specular[] = { btScalar(1.0), btScalar(1.0), btScalar(1.0), btScalar(1.0 )}; + /* light_position is NOT default value */ + GLfloat light_position0[] = { btScalar(10000.0), btScalar(10000.0), btScalar(10000.0), btScalar(0.0 )}; + GLfloat light_position1[] = { btScalar(-1.0), btScalar(-10.0), btScalar(-1.0), btScalar(0.0) }; + + glLightfv(GL_LIGHT0, GL_AMBIENT, light_ambient); + glLightfv(GL_LIGHT0, GL_DIFFUSE, light_diffuse); + glLightfv(GL_LIGHT0, GL_SPECULAR, light_specular); + glLightfv(GL_LIGHT0, GL_POSITION, light_position0); + + glLightfv(GL_LIGHT1, GL_AMBIENT, light_ambient); + glLightfv(GL_LIGHT1, GL_DIFFUSE, light_diffuse); + glLightfv(GL_LIGHT1, GL_SPECULAR, light_specular); + glLightfv(GL_LIGHT1, GL_POSITION, light_position1); + + glEnable(GL_LIGHTING); + glEnable(GL_LIGHT0); + glEnable(GL_LIGHT1); + + + // glShadeModel(GL_FLAT);//GL_SMOOTH); + glShadeModel(GL_SMOOTH); + + glEnable(GL_DEPTH_TEST); + glDepthFunc(GL_LESS); + + glClearColor(float(0.7),float(0.7),float(0.7),float(0)); + glEnable(GL_LIGHTING); + glEnable(GL_LIGHT0); + + + static bool m_textureenabled = true; + static bool m_textureinitialized = false; + + + if(m_textureenabled) + { + if(!m_textureinitialized) + { + glActiveTexture(GL_TEXTURE0); + + GLubyte* image=new GLubyte[256*256*3]; + for(int y=0;y<256;++y) + { + const int t=y>>5; + GLubyte* pi=image+y*256*3; + for(int x=0;x<256;++x) + { + if (x<2||y<2||x>253||y>253) + { + pi[0]=0; + pi[1]=0; + pi[2]=0; + } else + { + pi[0]=255; + pi[1]=255; + pi[2]=255; + } + + /* + const int s=x>>5; + const GLubyte b=180; + GLubyte c=b+((s+t&1)&1)*(255-b); + pi[0]=c; + pi[1]=c; + pi[2]=c; + */ + + pi+=3; + } + } + + glGenTextures(1,(GLuint*)&m_texturehandle); + glBindTexture(GL_TEXTURE_2D,m_texturehandle); + glTexEnvf(GL_TEXTURE_ENV,GL_TEXTURE_ENV_MODE,GL_MODULATE); + glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_MIN_FILTER,GL_LINEAR_MIPMAP_LINEAR); + glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_MAG_FILTER,GL_LINEAR); + glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_WRAP_S,GL_REPEAT); + glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_WRAP_T,GL_REPEAT); + gluBuild2DMipmaps(GL_TEXTURE_2D,3,256,256,GL_RGB,GL_UNSIGNED_BYTE,image); + delete[] image; + m_textureinitialized=true; + } + // glMatrixMode(GL_TEXTURE); + // glLoadIdentity(); + // glMatrixMode(GL_MODELVIEW); + + glEnable(GL_TEXTURE_2D); + glBindTexture(GL_TEXTURE_2D,m_texturehandle); + + } else + { + glDisable(GL_TEXTURE_2D); + } + + glEnable(GL_COLOR_MATERIAL); + + err = glGetError(); + assert(err==GL_NO_ERROR); + + // glEnable(GL_CULL_FACE); + // glCullFace(GL_BACK); +} + +void updateCamera() +{ + + + + btVector3 m_cameraUp(0,1,0); + int m_forwardAxis=2; + + + glMatrixMode(GL_PROJECTION); + glLoadIdentity(); + + + //m_azi+=0.0f; + + btScalar rele = m_ele * btScalar(0.01745329251994329547);// rads per deg + btScalar razi = m_azi * btScalar(0.01745329251994329547);// rads per deg + + + btQuaternion rot(m_cameraUp,razi); + + + btVector3 eyePos(0,0,0); + eyePos[m_forwardAxis] = -m_cameraDistance; + + btVector3 forward(eyePos[0],eyePos[1],eyePos[2]); + if (forward.length2() < SIMD_EPSILON) + { + forward.setValue(1.f,0.f,0.f); + } + btVector3 right = m_cameraUp.cross(forward); + btQuaternion roll(right,-rele); + + eyePos = btMatrix3x3(rot) * btMatrix3x3(roll) * eyePos; + + m_cameraPosition[0] = eyePos.getX(); + m_cameraPosition[1] = eyePos.getY(); + m_cameraPosition[2] = eyePos.getZ(); + m_cameraPosition += m_cameraTargetPosition; + + + float m_frustumZNear=1; + float m_frustumZFar=1000; + + if (m_glutScreenWidth == 0 && m_glutScreenHeight == 0) + return; + + float aspect; + btVector3 extents; + + if (m_glutScreenWidth > m_glutScreenHeight) + { + aspect = m_glutScreenWidth / (float)m_glutScreenHeight; + extents.setValue(aspect * 1.0f, 1.0f,0); + } else + { + aspect = m_glutScreenHeight / (float)m_glutScreenWidth; + extents.setValue(1.0f, aspect*1.f,0); + } + + + if (m_ortho) + { + // reset matrix + glLoadIdentity(); + extents *= m_cameraDistance; + btVector3 lower = m_cameraTargetPosition - extents; + btVector3 upper = m_cameraTargetPosition + extents; + glOrtho(lower.getX(), upper.getX(), lower.getY(), upper.getY(),-1000,1000); + + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); + } else + { + if (m_glutScreenWidth > m_glutScreenHeight) + { + glFrustum (-aspect * m_frustumZNear, aspect * m_frustumZNear, -m_frustumZNear, m_frustumZNear, m_frustumZNear, m_frustumZFar); + } else + { + glFrustum (-aspect * m_frustumZNear, aspect * m_frustumZNear, -m_frustumZNear, m_frustumZNear, m_frustumZNear, m_frustumZFar); + } + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); + gluLookAt(m_cameraPosition[0], m_cameraPosition[1], m_cameraPosition[2], + m_cameraTargetPosition[0], m_cameraTargetPosition[1], m_cameraTargetPosition[2], + m_cameraUp.getX(),m_cameraUp.getY(),m_cameraUp.getZ()); + } + +} + + +void GLInstancingRenderer::RenderScene(void) +{ + BT_PROFILE("GlutDisplayFunc"); + + myinit(); + + updateCamera(); + + //render coordinate system + glBegin(GL_LINES); + glColor3f(1,0,0); + glVertex3f(0,0,0); + glVertex3f(1,0,0); + glColor3f(0,1,0); + glVertex3f(0,0,0); + glVertex3f(0,1,0); + glColor3f(0,0,1); + glVertex3f(0,0,0); + glVertex3f(0,0,1); + glEnd(); + + //do a finish, to make sure timings are clean + // glFinish(); + + + + // glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindBuffer(GL_ARRAY_BUFFER, cube_vbo); + glFlush(); + + //updatePos(); + +// simulationLoop(); + + //useCPU = true; + + int totalNumInstances = 0; + + for (int i=0;im_numGraphicsInstances; + } + + int curOffset = 0; + + for (int i=0;im_instanceOffset*4*sizeof(float); + + int POSITION_BUFFER_SIZE = (totalNumInstances*sizeof(float)*4); + int ORIENTATION_BUFFER_SIZE = (totalNumInstances*sizeof(float)*4); + int COLOR_BUFFER_SIZE = (totalNumInstances*sizeof(float)*4); + int SCALE_BUFFER_SIZE = (totalNumInstances*sizeof(float)*3); + + glBindVertexArray(gfxObj->m_cube_vao); + + + int vertexStride = 9*sizeof(float); + int vertexBase = gfxObj->m_vertexArrayOffset*vertexStride; + + glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, 9*sizeof(float), (GLvoid*)vertexBase); + glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, 0, (GLvoid *)(curOffset*4*sizeof(float)+SHAPE_BUFFER_SIZE)); + glVertexAttribPointer(2, 4, GL_FLOAT, GL_FALSE, 0, (GLvoid *)(curOffset*4*sizeof(float)+SHAPE_BUFFER_SIZE+POSITION_BUFFER_SIZE)); + int uvoffset = 7*sizeof(float)+vertexBase; + int normaloffset = 4*sizeof(float)+vertexBase; + + glVertexAttribPointer(3, 2, GL_FLOAT, GL_FALSE, 9*sizeof(float), (GLvoid *)uvoffset); + glVertexAttribPointer(4, 3, GL_FLOAT, GL_FALSE, 9*sizeof(float), (GLvoid *)normaloffset); + glVertexAttribPointer(5, 4, GL_FLOAT, GL_FALSE, 0, (GLvoid *)(curOffset*4*sizeof(float)+SHAPE_BUFFER_SIZE+POSITION_BUFFER_SIZE+ORIENTATION_BUFFER_SIZE)); + glVertexAttribPointer(6, 3, GL_FLOAT, GL_FALSE, 0, (GLvoid *)(curOffset*3*sizeof(float)+SHAPE_BUFFER_SIZE+POSITION_BUFFER_SIZE+ORIENTATION_BUFFER_SIZE+COLOR_BUFFER_SIZE)); + + glEnableVertexAttribArray(0); + glEnableVertexAttribArray(1); + glEnableVertexAttribArray(2); + glEnableVertexAttribArray(3); + glEnableVertexAttribArray(4); + glEnableVertexAttribArray(5); + glEnableVertexAttribArray(6); + + glVertexAttribDivisor(0, 0); + glVertexAttribDivisor(1, 1); + glVertexAttribDivisor(2, 1); + glVertexAttribDivisor(3, 0); + glVertexAttribDivisor(4, 0); + glVertexAttribDivisor(5, 1); + glVertexAttribDivisor(6, 1); + + glUseProgram(instancingShader); + glUniform1f(angle_loc, 0); + GLfloat pm[16]; + glGetFloatv(GL_PROJECTION_MATRIX, pm); + glUniformMatrix4fv(ProjectionMatrix, 1, false, &pm[0]); + + GLfloat mvm[16]; + glGetFloatv(GL_MODELVIEW_MATRIX, mvm); + glUniformMatrix4fv(ModelViewMatrix, 1, false, &mvm[0]); + + glUniform1i(uniform_texture_diffuse, 0); + + glFlush(); + + if (gfxObj->m_numGraphicsInstances) + { + int indexCount = gfxObj->m_numIndices; + int indexOffset = 0; + + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, gfxObj->m_index_vbo); + { + BT_PROFILE("glDrawElementsInstanced"); + glDrawElementsInstanced(GL_TRIANGLES, indexCount, GL_UNSIGNED_INT, (void*)indexOffset, gfxObj->m_numGraphicsInstances); + } + } + curOffset+= gfxObj->m_numGraphicsInstances; + } + glUseProgram(0); + glBindBuffer(GL_ARRAY_BUFFER,0); + glBindVertexArray(0); + + + GLint err = glGetError(); + assert(err==GL_NO_ERROR); +} + + +void GLInstancingRenderer::CleanupShaders() +{ + + delete []m_data->m_instance_positions_ptr; + delete []m_data->m_instance_quaternion_ptr; + delete []m_data->m_instance_colors_ptr; + delete []m_data->m_instance_scale_ptr; +} \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/GLInstancingRenderer.h b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/GLInstancingRenderer.h new file mode 100644 index 000000000..b5924385b --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/GLInstancingRenderer.h @@ -0,0 +1,45 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Erwin Coumans + +#ifndef GL_INSTANCING_RENDERER_H +#define GL_INSTANCING_RENDERER_H + +#include "LinearMath/btAlignedObjectArray.h" + +class GLInstancingRenderer +{ + + btAlignedObjectArray m_graphicsInstances; + + struct InternalDataRenderer* m_data; + +public: + GLInstancingRenderer(); + virtual ~GLInstancingRenderer(); + + void InitShaders(); + void RenderScene(void); + void CleanupShaders(); + + ///vertices must be in the format x,y,z, nx,ny,nz, u,v + int registerShape(const float* vertices, int numvertices, const int* indices, int numIndices); + + ///position x,y,z, quaternion x,y,z,w, color r,g,b,a, scaling x,y,z + int registerGraphicsInstance(int shapeIndex, const float* position, const float* quaternion, const float* color, const float* scaling); + + void writeTransforms(); +}; + +#endif //GL_INSTANCING_RENDERER_H diff --git a/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/GlutRenderer.cpp b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/GlutRenderer.cpp new file mode 100644 index 000000000..3cb02c278 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/GlutRenderer.cpp @@ -0,0 +1,107 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Erwin Coumans + + +#include +#include "GlutRenderer.h" +#include + + +GlutRenderer* GlutRenderer::gDemoApplication; + + + +void GlutRenderer::runMainLoop() +{ + glutMainLoop(); + +} + + +static void glutKeyboardCallback(unsigned char key, int x, int y) { GlutRenderer::gDemoApplication->keyboardCallback(key,x,y); } +static void glutKeyboardUpCallback(unsigned char key, int x, int y){ GlutRenderer::gDemoApplication->keyboardUpCallback(key,x,y);} +static void glutSpecialKeyboardCallback(int key, int x, int y){ GlutRenderer::gDemoApplication->specialKeyboard(key,x,y);} +static void glutSpecialKeyboardUpCallback(int key, int x, int y){ GlutRenderer::gDemoApplication->specialKeyboardUp(key,x,y);} +static void glutReshapeCallback(int w, int h){ GlutRenderer::gDemoApplication->resize(w,h);} +static void glutIdleCallback(){ glutPostRedisplay (); } +static void glutMouseFuncCallback(int button, int state, int x, int y){ GlutRenderer::gDemoApplication->mouseFunc(button,state,x,y);} +static void glutMotionFuncCallback(int x,int y){ GlutRenderer::gDemoApplication->mouseMotionFunc(x,y);} +static void glutDisplayCallback(void){ GlutRenderer::gDemoApplication->displayCallback();} + + +void GlutRenderer::resize(int width, int height) +{ + m_glutScreenWidth = width; + m_glutScreenHeight = height; +} + +void GlutRenderer::mouseFunc(int button, int state, int x, int y) +{ +} +void GlutRenderer::mouseMotionFunc(int x,int y) +{ +} + +void GlutRenderer::renderScene() +{ + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + glutSwapBuffers(); + glutPostRedisplay(); + + GLint err = glGetError(); + assert(err==GL_NO_ERROR); +} + +void GlutRenderer::displayCallback() +{ + updateScene(); + + renderScene(); +} + +GlutRenderer::GlutRenderer(int argc, char* argv[]) +{ + glutInit(&argc, argv); + gDemoApplication = this; +} + +void GlutRenderer::initGraphics(int width, int height) +{ + m_glutScreenWidth = width; + m_glutScreenHeight = height; + + glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGBA); + + glutInitWindowSize(m_glutScreenWidth, m_glutScreenHeight); + glutCreateWindow("GPU rigid body pipeline2"); + glutKeyboardFunc(glutKeyboardCallback); + glutKeyboardUpFunc(glutKeyboardUpCallback); + glutSpecialFunc(glutSpecialKeyboardCallback); + glutSpecialUpFunc(glutSpecialKeyboardUpCallback); + glutReshapeFunc(glutReshapeCallback); + glutIdleFunc(glutIdleCallback); + glutMouseFunc(glutMouseFuncCallback); + glutPassiveMotionFunc(glutMotionFuncCallback); + glutMotionFunc(glutMotionFuncCallback); + glutDisplayFunc( glutDisplayCallback ); + + GLenum err = glewInit(); + if (GLEW_OK != err) + { + printf("Error: %s\n", glewGetErrorString(err)); + } + + glClearColor(0.6f,0.6f,1.f,1.f); +} \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/GlutRenderer.h b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/GlutRenderer.h new file mode 100644 index 000000000..3596ad490 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/GlutRenderer.h @@ -0,0 +1,59 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Erwin Coumans + +#ifndef GLUT_RENDERER_H +#define GLUT_RENDERER_H + +#include "btGlutInclude.h" +#include "LinearMath/btVector3.h" + +struct GlutRenderer +{ + static GlutRenderer* gDemoApplication; + int m_glutScreenWidth; + int m_glutScreenHeight; + + btVector3 m_cameraPosition; + btVector3 m_cameraTargetPosition; + btScalar m_cameraDistance; + btVector3 m_cameraUp; + float m_azimuth; + float m_elevation; + + + GlutRenderer(int argc, char* argv[]); + + virtual void initGraphics(int width, int height); + virtual void cleanup() {} + + void runMainLoop(); + + virtual void updateScene(){}; + + virtual void renderScene(); + + virtual void keyboardCallback(unsigned char key, int x, int y) {}; + virtual void keyboardUpCallback(unsigned char key, int x, int y) {} + virtual void specialKeyboard(int key, int x, int y){} + virtual void specialKeyboardUp(int key, int x, int y){} + virtual void resize(int w, int h); + virtual void mouseFunc(int button, int state, int x, int y); + virtual void mouseMotionFunc(int x,int y); + virtual void displayCallback(); + + +}; + +#endif //GLUT_RENDERER_H diff --git a/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/NVIDIA/premake4.lua b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/NVIDIA/premake4.lua new file mode 100644 index 000000000..8ccc57e5d --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/NVIDIA/premake4.lua @@ -0,0 +1,64 @@ + + hasCL = findOpenCL_NVIDIA() + + if (hasCL) then + + project "OpenCL_gpu_rigidbody_pipeline2_NVIDIA" + + initOpenCL_NVIDIA() + + language "C++" + + kind "ConsoleApp" + targetdir "../../../bin" + + initOpenGL() + initGlew() + + + includedirs { + "../../primitives", + "../../../bullet2" + } + + files { + "../main.cpp", + "../CLPhysicsDemo.cpp", + "../CLPhysicsDemo.h", + "../GLInstancingRenderer.cpp", + "../GLInstancingRenderer.h", + "../GlutRenderer.cpp", + "../GlutRenderer.h", + "../Win32OpenGLRenderManager.cpp", + "../Win32OpenGLRenderManager.h", + "../../gpu_rigidbody_pipeline/btConvexUtility.cpp", + "../../gpu_rigidbody_pipeline/btConvexUtility.h", + "../../gpu_rigidbody_pipeline/btGpuNarrowPhaseAndSolver.cpp", + "../../gpu_rigidbody_pipeline/btGpuNarrowPhaseAndSolver.h", + "../../../dynamics/basic_demo/ConvexHeightFieldShape.cpp", + "../../../dynamics/basic_demo/ConvexHeightFieldShape.h", + "../../../bullet2/LinearMath/btConvexHullComputer.cpp", + "../../../bullet2/LinearMath/btConvexHullComputer.h", + "../../broadphase_benchmark/findPairsOpenCL.cpp", + "../../broadphase_benchmark/findPairsOpenCL.h", + "../../broadphase_benchmark/btGridBroadphaseCL.cpp", + "../../broadphase_benchmark/btGridBroadphaseCL.h", + "../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.cpp", + "../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.h", + "../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.cpp", + "../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.h", + "../../../bullet2/LinearMath/btAlignedAllocator.cpp", + "../../../bullet2/LinearMath/btQuickprof.cpp", + "../../../bullet2/LinearMath/btQuickprof.h", + "../../../bullet2/BulletCollision/BroadphaseCollision/btBroadphaseProxy.cpp", + "../../../bullet2/BulletCollision/BroadphaseCollision/btOverlappingPairCache.cpp", + "../../../bullet2/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp", + "../../basic_initialize/btOpenCLUtils.cpp", + "../../basic_initialize/btOpenCLUtils.h", + "../../opengl_interop/btOpenCLGLInteropBuffer.cpp", + "../../opengl_interop/btOpenCLGLInteropBuffer.h", + "../../opengl_interop/btStopwatch.cpp", + "../../opengl_interop/btStopwatch.h" + } + + end diff --git a/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/OpenGLInclude.h b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/OpenGLInclude.h new file mode 100644 index 000000000..2b3060bd3 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/OpenGLInclude.h @@ -0,0 +1,41 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Erwin Coumans + + +#ifndef __OPENGL_INCLUDE_H +#define __OPENGL_INCLUDE_H + +#include + +//think different +#if defined(__APPLE__) && !defined (VMDMESA) +#include +#include +#include +#else + + +#ifdef _WINDOWS +#include +#include +#include +#else +#include + +#endif //_WINDOWS +#endif //APPLE + +#endif //__OPENGL_INCLUDE_H + diff --git a/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/ShapeData.h b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/ShapeData.h new file mode 100644 index 000000000..e77affe61 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/ShapeData.h @@ -0,0 +1,210 @@ +#ifndef SHAPE_DATA_H +#define SHAPE_DATA_H + +static float barrel_vertices[] = { +0.0f,-0.5f,0.0f, 1.0f, 0.0f,-1.0f,0.0f, 0.5f, 0.5f, +0.282362f,-0.5f,-0.205148f, 1.0f, 0.0f,-1.0f,0.0f, 0.5f, 0.5f, +0.349018f,-0.5f,0.0f, 1.0f, 0.0f,-1.0f,0.0f, 0.5f, 0.5f, +0.107853f,-0.5f,-0.331936f, 1.0f, 0.0f,-1.0f,0.0f, 0.5f, 0.5f, +-0.107853f,-0.5f,-0.331936f, 1.0f, 0.0f,-1.0f,0.0f, 0.5f, 0.5f, +0.107853f,-0.5f,-0.331936f, 1.0f, 0.0f,-1.0f,0.0f, 0.5f, 0.5f, +-0.282362f,-0.5f,-0.205148f, 1.0f, 0.0f,-1.0f,0.0f, 0.5f, 0.5f, +-0.349018f,-0.5f,0.0f, 1.0f, 0.0f,-1.0f,0.0f, 0.5f, 0.5f, +-0.282362f,-0.5f,0.205148f, 1.0f, 0.0f,-1.0f,0.0f, 0.5f, 0.5f, +-0.107853f,-0.5f,0.331936f, 1.0f, 0.0f,-1.0f,0.0f, 0.5f, 0.5f, +0.107853f,-0.5f,0.331936f, 1.0f, 0.0f,-1.0f,0.0f, 0.5f, 0.5f, +0.282362f,-0.5f,0.205148f, 1.0f, 0.0f,-1.0f,0.0f, 0.5f, 0.5f, +0.0f,0.5f,0.0f, 1.0f, 0.0f,1.0f,0.0f, 0.5f, 0.5f, +0.349018f,0.5f,0.0f, 1.0f, 0.0f,1.0f,0.0f, 0.5f, 0.5f, +0.282362f,0.5f,-0.205148f, 1.0f, 0.0f,1.0f,0.0f, 0.5f, 0.5f, +0.107853f,0.5f,-0.331936f, 1.0f, 0.0f,1.0f,0.0f, 0.5f, 0.5f, +0.107853f,0.5f,-0.331936f, 1.0f, 0.0f,1.0f,0.0f, 0.5f, 0.5f, +-0.107853f,0.5f,-0.331936f, 1.0f, 0.0f,1.0f,0.0f, 0.5f, 0.5f, +-0.282362f,0.5f,-0.205148f, 1.0f, 0.0f,1.0f,0.0f, 0.5f, 0.5f, +-0.349018f,0.5f,0.0f, 1.0f, 0.0f,1.0f,0.0f, 0.5f, 0.5f, +-0.282362f,0.5f,0.205148f, 1.0f, 0.0f,1.0f,0.0f, 0.5f, 0.5f, +-0.107853f,0.5f,0.331936f, 1.0f, 0.0f,1.0f,0.0f, 0.5f, 0.5f, +0.107853f,0.5f,0.331936f, 1.0f, 0.0f,1.0f,0.0f, 0.5f, 0.5f, +0.282362f,0.5f,0.205148f, 1.0f, 0.0f,1.0f,0.0f, 0.5f, 0.5f, +0.349018f,-0.5f,0.0f, 1.0f, 0.957307f,-0.289072f,0.0f, 0.5f, 0.5f, +0.404509f,0.0f,-0.293893f, 1.0f, 0.809017f,0.0f,-0.587785f, 0.5f, 0.5f, +0.5f,0.0f,0.0f, 1.0f, 1.0f,0.0f,0.0f, 0.5f, 0.5f, +0.282362f,-0.5f,-0.205148f, 1.0f, 0.774478f,-0.289072f,-0.562691f, 0.5f, 0.5f, +0.154508f,0.0f,-0.475528f, 1.0f, 0.309017f,0.0f,-0.951057f, 0.5f, 0.5f, +0.107853f,-0.5f,-0.331936f, 1.0f, 0.295824f,-0.289072f,-0.910453f, 0.5f, 0.5f, +0.107853f,-0.5f,-0.331936f, 1.0f, 0.295824f,-0.289072f,-0.910453f, 0.5f, 0.5f, +-0.154509f,0.0f,-0.475528f, 1.0f, -0.309017f,0.0f,-0.951057f, 0.5f, 0.5f, +0.154508f,0.0f,-0.475528f, 1.0f, 0.309017f,0.0f,-0.951057f, 0.5f, 0.5f, +-0.107853f,-0.5f,-0.331936f, 1.0f, -0.295824f,-0.289072f,-0.910453f, 0.5f, 0.5f, +-0.404509f,0.0f,-0.293893f, 1.0f, -0.809017f,0.0f,-0.587785f, 0.5f, 0.5f, +-0.282362f,-0.5f,-0.205148f, 1.0f, -0.774478f,-0.289072f,-0.562691f, 0.5f, 0.5f, +-0.5f,0.0f,0.0f, 1.0f, -1.0f,0.0f,0.0f, 0.5f, 0.5f, +-0.349018f,-0.5f,0.0f, 1.0f, -0.957307f,-0.289072f,0.0f, 0.5f, 0.5f, +-0.404508f,0.0f,0.293893f, 1.0f, -0.809017f,0.0f,0.587785f, 0.5f, 0.5f, +-0.282362f,-0.5f,0.205148f, 1.0f, -0.774478f,-0.289072f,0.562691f, 0.5f, 0.5f, +-0.154509f,0.0f,0.475528f, 1.0f, -0.309017f,0.0f,0.951056f, 0.5f, 0.5f, +-0.107853f,-0.5f,0.331936f, 1.0f, -0.295824f,-0.289072f,0.910453f, 0.5f, 0.5f, +0.154509f,0.0f,0.475528f, 1.0f, 0.309017f,0.0f,0.951056f, 0.5f, 0.5f, +0.107853f,-0.5f,0.331936f, 1.0f, 0.295824f,-0.289072f,0.910453f, 0.5f, 0.5f, +0.404509f,0.0f,0.293892f, 1.0f, 0.809017f,0.0f,0.587785f, 0.5f, 0.5f, +0.282362f,-0.5f,0.205148f, 1.0f, 0.774478f,-0.289072f,0.562691f, 0.5f, 0.5f, +0.282362f,0.5f,-0.205148f, 1.0f, 0.774478f,0.289072f,-0.562691f, 0.5f, 0.5f, +0.349018f,0.5f,0.0f, 1.0f, 0.957307f,0.289072f,0.0f, 0.5f, 0.5f, +0.107853f,0.5f,-0.331936f, 1.0f, 0.295824f,0.289072f,-0.910453f, 0.5f, 0.5f, +-0.107853f,0.5f,-0.331936f, 1.0f, -0.295824f,0.289072f,-0.910453f, 0.5f, 0.5f, +0.107853f,0.5f,-0.331936f, 1.0f, 0.295824f,0.289072f,-0.910453f, 0.5f, 0.5f, +-0.282362f,0.5f,-0.205148f, 1.0f, -0.774478f,0.289072f,-0.562691f, 0.5f, 0.5f, +-0.349018f,0.5f,0.0f, 1.0f, -0.957307f,0.289072f,0.0f, 0.5f, 0.5f, +-0.282362f,0.5f,0.205148f, 1.0f, -0.774478f,0.289072f,0.562691f, 0.5f, 0.5f, +-0.107853f,0.5f,0.331936f, 1.0f, -0.295824f,0.289072f,0.910453f, 0.5f, 0.5f, +0.107853f,0.5f,0.331936f, 1.0f, 0.295824f,0.289072f,0.910453f, 0.5f, 0.5f, +0.282362f,0.5f,0.205148f, 1.0f, 0.774478f,0.289072f,0.562691f, 0.5f, 0.5f +}; + + + +static int barrel_indices[] = { +0,1,2, +0,3,1, +0,4,5, +0,6,4, +0,7,6, +0,8,7, +0,9,8, +0,10,9, +0,11,10, +0,2,11, +12,13,14, +12,14,15, +12,16,17, +12,17,18, +12,18,19, +12,19,20, +12,20,21, +12,21,22, +12,22,23, +12,23,13, +24,25,26, +24,27,25, +27,28,25, +27,29,28, +30,31,32, +30,33,31, +33,34,31, +33,35,34, +35,36,34, +35,37,36, +37,38,36, +37,39,38, +39,40,38, +39,41,40, +41,42,40, +41,43,42, +43,44,42, +43,45,44, +45,26,44, +45,24,26, +26,46,47, +26,25,46, +25,48,46, +25,28,48, +32,49,50, +32,31,49, +31,51,49, +31,34,51, +34,52,51, +34,36,52, +36,53,52, +36,38,53, +38,54,53, +38,40,54, +40,55,54, +40,42,55, +42,56,55, +42,44,56, +44,47,56, +44,26,47, +}; + + +///position xyz, unused w, normal, uv +static const float cube_vertices[] = +{ + -0.5f, -0.5f, 0.5f, 0.0f, 0,0,1, 0,0,//0 + 0.5f, -0.5f, 0.5f, 0.0f, 0,0,1, 1,0,//1 + 0.5f, 0.5f, 0.5f, 0.0f, 0,0,1, 1,1,//2 + -0.5f, 0.5f, 0.5f, 0.0f, 0,0,1, 0,1 ,//3 + + -0.5f, -0.5f, -0.5f, 0.5f, 0,0,-1, 0,0,//4 + 0.5f, -0.5f, -0.5f, 0.5f, 0,0,-1, 1,0,//5 + 0.5f, 0.5f, -0.5f, 0.5f, 0,0,-1, 1,1,//6 + -0.5f, 0.5f, -0.5f, 0.5f, 0,0,-1, 0,1,//7 + + -0.5f, -0.5f, -0.5f, 0.5f, -1,0,0, 0,0, + -0.5f, 0.5f, -0.5f, 0.5f, -1,0,0, 1,0, + -0.5f, 0.5f, 0.5f, 0.5f, -1,0,0, 1,1, + -0.5f, -0.5f, 0.5f, 0.5f, -1,0,0, 0,1, + + 0.5f, -0.5f, -0.5f, 0.5f, 1,0,0, 0,0, + 0.5f, 0.5f, -0.5f, 0.5f, 1,0,0, 1,0, + 0.5f, 0.5f, 0.5f, 0.5f, 1,0,0, 1,1, + 0.5f, -0.5f, 0.5f, 0.5f, 1,0,0, 0,1, + + -0.5f, -0.5f, -0.5f, 0.5f, 0,-1,0, 0,0, + -0.5f, -0.5f, 0.5f, 0.5f, 0,-1,0, 1,0, + 0.5f, -0.5f, 0.5f, 0.5f, 0,-1,0, 1,1, + 0.5f,-0.5f, -0.5f, 0.5f, 0,-1,0, 0,1, + + -0.5f, 0.5f, -0.5f, 0.5f, 0,1,0, 0,0, + -0.5f, 0.5f, 0.5f, 0.5f, 0,1,0, 1,0, + 0.5f, 0.5f, 0.5f, 0.5f, 0,1,0, 1,1, + 0.5f,0.5f, -0.5f, 0.5f, 0,1,0, 0,1, +}; + + +///position xyz, unused w, normal, uv +static const float cube_vertices2[] = +{ + -1.5f, -0.5f, 0.5f, 0.0f, 0,0,1, 0,0,//0 + 1.5f, -0.5f, 0.5f, 0.0f, 0,0,1, 1,0,//1 + 1.5f, 0.5f, 0.5f, 0.0f, 0,0,1, 1,1,//2 + -1.5f, 0.5f, 0.5f, 0.0f, 0,0,1, 0,1 ,//3 + + -1.5f, -0.5f, -0.5f, 0.5f, 0,0,-1, 0,0,//4 + 1.5f, -0.5f, -0.5f, 0.5f, 0,0,-1, 1,0,//5 + 1.5f, 0.5f, -0.5f, 0.5f, 0,0,-1, 1,1,//6 + -1.5f, 0.5f, -0.5f, 0.5f, 0,0,-1, 0,1,//7 + + -1.5f, -0.5f, -0.5f, 0.5f, -1,0,0, 0,0, + -1.5f, 0.5f, -0.5f, 0.5f, -1,0,0, 1,0, + -1.5f, 0.5f, 0.5f, 0.5f, -1,0,0, 1,1, + -1.5f, -0.5f, 0.5f, 0.5f, -1,0,0, 0,1, + + 1.5f, -0.5f, -0.5f, 0.5f, 1,0,0, 0,0, + 1.5f, 0.5f, -0.5f, 0.5f, 1,0,0, 1,0, + 1.5f, 0.5f, 0.5f, 0.5f, 1,0,0, 1,1, + 1.5f, -0.5f, 0.5f, 0.5f, 1,0,0, 0,1, + + -1.5f, -0.5f, -0.5f, 0.5f, 0,-1,0, 0,0, + -1.5f, -0.5f, 0.5f, 0.5f, 0,-1,0, 1,0, + 1.5f, -0.5f, 0.5f, 0.5f, 0,-1,0, 1,1, + 1.5f, -0.5f, -0.5f, 0.5f, 0,-1,0, 0,1, + + -1.5f, 0.5f, -0.5f, 0.5f, 0,1,0, 0,0, + -1.5f, 0.5f, 0.5f, 0.5f, 0,1,0, 1,0, + 1.5f, 0.5f, 0.5f, 0.5f, 0,1,0, 1,1, + 1.5f, 0.5f, -0.5f, 0.5f, 0,1,0, 0,1, +}; + + +static const int cube_indices[]= +{ + 0,1,2,0,2,3,//ground face + 4,5,6,4,6,7,//top face + 8,9,10,8,10,11, + 12,13,14,12,14,15, + 16,17,18,16,18,19, + 20,21,22,20,22,23 +}; + +#endif //SHAPE_DATA_H diff --git a/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/Win32OpenGLRenderManager.cpp b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/Win32OpenGLRenderManager.cpp new file mode 100644 index 000000000..c3b9f250a --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/Win32OpenGLRenderManager.cpp @@ -0,0 +1,465 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Erwin Coumans + + +#include "Win32OpenGLRenderManager.h" + +#include +#include + +static InternalData2* sData = 0; + +struct InternalData2 +{ + HWND m_hWnd;; + int m_width; + int m_height; + HDC m_hDC; + HGLRC m_hRC; + bool m_OpenGLInitialized; + int m_oldScreenWidth; + int m_oldHeight; + int m_oldBitsPerPel; + bool m_quit; + + + InternalData2() + { + m_hWnd = 0; + m_width = 0; + m_height = 0; + m_hDC = 0; + m_hRC = 0; + m_OpenGLInitialized = false; + m_oldScreenWidth = 0; + m_oldHeight = 0; + m_oldBitsPerPel = 0; + m_quit = false; + } +}; + + +void Win32OpenGLWindow::enableOpenGL() +{ + + + + PIXELFORMATDESCRIPTOR pfd; + int format; + + // get the device context (DC) + m_data->m_hDC = GetDC( m_data->m_hWnd ); + + // set the pixel format for the DC + ZeroMemory( &pfd, sizeof( pfd ) ); + pfd.nSize = sizeof( pfd ); + pfd.nVersion = 1; + pfd.dwFlags = PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL | PFD_DOUBLEBUFFER; + pfd.iPixelType = PFD_TYPE_RGBA; + pfd.cColorBits = 24; + pfd.cDepthBits = 16; + pfd.cStencilBits = 1; + pfd.iLayerType = PFD_MAIN_PLANE; + format = ChoosePixelFormat( m_data->m_hDC, &pfd ); + SetPixelFormat( m_data->m_hDC, format, &pfd ); + + // create and enable the render context (RC) + m_data->m_hRC = wglCreateContext( m_data->m_hDC ); + wglMakeCurrent( m_data->m_hDC, m_data->m_hRC ); + m_data->m_OpenGLInitialized = true; + + +} + + +void Win32OpenGLWindow::disableOpenGL() +{ + m_data->m_OpenGLInitialized = false; + + wglMakeCurrent( NULL, NULL ); + wglDeleteContext( m_data->m_hRC ); + ReleaseDC( m_data->m_hWnd, m_data->m_hDC ); +} + +void Win32OpenGLWindow::pumpMessage() +{ + MSG msg; + // check for messages + if ( PeekMessage( &msg, NULL, 0, 0, PM_REMOVE ) ) + { + + // handle or dispatch messages + if ( msg.message == WM_QUIT ) + { + m_data->m_quit = TRUE; + } + else + { + TranslateMessage( &msg ); + DispatchMessage( &msg ); + } + +// gDemoApplication->displayCallback(); + + + }; +} + + + +LRESULT CALLBACK WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam) +{ + switch (message) + { + case WM_PAINT: + { + PAINTSTRUCT ps; + BeginPaint(hWnd, &ps); + EndPaint(hWnd, &ps); + } + return 0; + + case WM_ERASEBKGND: + return 0; + + case WM_DESTROY: + PostQuitMessage(0); + return 0; + + case WM_KEYDOWN: + { + switch ( wParam ) + { + case 'Q': + case VK_ESCAPE: + { + PostQuitMessage(0); + } + return 0; + } + break; + } + + case WM_SIZE: // Size Action Has Taken Place + + switch (wParam) // Evaluate Size Action + { + case SIZE_MINIMIZED: // Was Window Minimized? + return 0; // Return + + case SIZE_MAXIMIZED: // Was Window Maximized? + + sData->m_width = LOWORD (lParam); + sData->m_height = HIWORD (lParam); + //if (sOpenGLInitialized) + //{ + // //gDemoApplication->reshape(sWidth,sHeight); + //} + glViewport(0, 0, sData->m_width, sData->m_height); + return 0; // Return + + case SIZE_RESTORED: // Was Window Restored? + sData->m_width = LOWORD (lParam); + sData->m_height = HIWORD (lParam); + //if (sOpenGLInitialized) + //{ + // gDemoApplication->reshape(sWidth,sHeight); + //} + glViewport(0, 0, sData->m_width, sData->m_height); + return 0; // Return + } + break; + + default:{ + + } + }; + + return DefWindowProc(hWnd, message, wParam, lParam); +} + + + + +void Win32OpenGLWindow::init(int width,int height, bool fullscreen,int colorBitsPerPixel, void* windowHandle) +{ + // get handle to exe file + HINSTANCE hInstance = GetModuleHandle(0); + + // create the window if we need to and we do not use the null device + if (!windowHandle) + { + const char* ClassName = "DeviceWin32"; + + // Register Class + WNDCLASSEX wcex; + wcex.cbSize = sizeof(WNDCLASSEX); + wcex.style = CS_HREDRAW | CS_VREDRAW; + wcex.lpfnWndProc = WndProc; + wcex.cbClsExtra = 0; + wcex.cbWndExtra = 0; + wcex.hInstance = hInstance; + wcex.hIcon = LoadIcon( NULL, IDI_APPLICATION ); //(HICON)LoadImage(hInstance, "bullet_ico.ico", IMAGE_ICON, 0,0, LR_LOADTRANSPARENT);//LR_LOADFROMFILE); + wcex.hCursor = LoadCursor(NULL, IDC_ARROW); + wcex.hbrBackground = (HBRUSH)(COLOR_WINDOW+1); + wcex.lpszMenuName = 0; + wcex.lpszClassName = ClassName; + wcex.hIconSm = 0; + + // if there is an icon, load it + wcex.hIcon = (HICON)LoadImage(hInstance, "irrlicht.ico", IMAGE_ICON, 0,0, LR_LOADFROMFILE); + + RegisterClassEx(&wcex); + + // calculate client size + + RECT clientSize; + clientSize.top = 0; + clientSize.left = 0; + clientSize.right = width; + clientSize.bottom = height; + + DWORD style = WS_POPUP; + + if (!fullscreen) + style = WS_SYSMENU | WS_BORDER | WS_CAPTION | WS_CLIPCHILDREN | WS_CLIPSIBLINGS | WS_MINIMIZEBOX | WS_MAXIMIZEBOX | WS_SIZEBOX; + + AdjustWindowRect(&clientSize, style, FALSE); + + m_data->m_width = clientSize.right - clientSize.left; + m_data->m_height = clientSize.bottom - clientSize.top; + + int windowLeft = (GetSystemMetrics(SM_CXSCREEN) - m_data->m_width) / 2; + int windowTop = (GetSystemMetrics(SM_CYSCREEN) - m_data->m_height) / 2; + + if (fullscreen) + { + windowLeft = 0; + windowTop = 0; + } + + // create window + + m_data->m_hWnd = CreateWindow( ClassName, "", style, windowLeft, windowTop, + m_data->m_width, m_data->m_height, NULL, NULL, hInstance, NULL); + + ShowWindow(m_data->m_hWnd, SW_SHOW); + UpdateWindow(m_data->m_hWnd); + + MoveWindow(m_data->m_hWnd, windowLeft, windowTop, m_data->m_width, m_data->m_height, TRUE); + } + else if (windowHandle) + { + // attach external window + m_data->m_hWnd = static_cast(windowHandle); + RECT r; + GetWindowRect(m_data->m_hWnd, &r); + m_data->m_width = r.right - r.left; + m_data->m_height = r.bottom - r.top; + //sFullScreen = false; + //sExternalWindow = true; + } + + + if (fullscreen) + { + DEVMODE dm; + memset(&dm, 0, sizeof(dm)); + dm.dmSize = sizeof(dm); + // use default values from current setting + EnumDisplaySettings(NULL, ENUM_CURRENT_SETTINGS, &dm); + m_data->m_oldScreenWidth = dm.dmPelsWidth; + m_data->m_oldHeight = dm.dmPelsHeight; + m_data->m_oldBitsPerPel = dm.dmBitsPerPel; + + dm.dmPelsWidth = width; + dm.dmPelsHeight = height; + if (colorBitsPerPixel) + { + dm.dmBitsPerPel = colorBitsPerPixel; + } + dm.dmFields = DM_BITSPERPEL | DM_PELSWIDTH | DM_PELSHEIGHT | DM_DISPLAYFREQUENCY; + + LONG res = ChangeDisplaySettings(&dm, CDS_FULLSCREEN); + if (res != DISP_CHANGE_SUCCESSFUL) + { // try again without forcing display frequency + dm.dmFields = DM_BITSPERPEL | DM_PELSWIDTH | DM_PELSHEIGHT; + res = ChangeDisplaySettings(&dm, CDS_FULLSCREEN); + } + + } + + //VideoDriver = video::createOpenGLDriver(CreationParams, FileSystem, this); + enableOpenGL(); + + + const wchar_t* text= L"OpenCL rigid body demo"; + + DWORD dwResult; + +#ifdef _WIN64 + SetWindowTextW(m_data->m_hWnd, text); +#else + SendMessageTimeoutW(m_data->m_hWnd, WM_SETTEXT, 0, + reinterpret_cast(text), + SMTO_ABORTIFHUNG, 2000, &dwResult); +#endif + + +} + + +void Win32OpenGLWindow::switchFullScreen(bool fullscreen,int width,int height,int colorBitsPerPixel) +{ + LONG res; + DEVMODE dm; + memset(&dm, 0, sizeof(dm)); + dm.dmSize = sizeof(dm); + // use default values from current setting + EnumDisplaySettings(NULL, ENUM_CURRENT_SETTINGS, &dm); + + dm.dmFields = DM_BITSPERPEL | DM_PELSWIDTH | DM_PELSHEIGHT | DM_DISPLAYFREQUENCY; + + if (fullscreen && !m_data->m_oldScreenWidth) + { + m_data->m_oldScreenWidth = dm.dmPelsWidth; + m_data->m_oldHeight = dm.dmPelsHeight; + m_data->m_oldBitsPerPel = dm.dmBitsPerPel; + + if (width && height) + { + dm.dmPelsWidth = width; + dm.dmPelsHeight = height; + } else + { + dm.dmPelsWidth = m_data->m_width; + dm.dmPelsHeight = m_data->m_height; + } + if (colorBitsPerPixel) + { + dm.dmBitsPerPel = colorBitsPerPixel; + } + } else + { + if (m_data->m_oldScreenWidth) + { + dm.dmPelsWidth = m_data->m_oldScreenWidth; + dm.dmPelsHeight= m_data->m_oldHeight; + dm.dmBitsPerPel = m_data->m_oldBitsPerPel; + } + } + + if (fullscreen) + { + res = ChangeDisplaySettings(&dm, CDS_FULLSCREEN); + } else + { + res = ChangeDisplaySettings(&dm, 0); + } +} + + + +Win32OpenGLWindow::Win32OpenGLWindow() +{ + m_data = new InternalData2(); + sData = m_data; +} + +Win32OpenGLWindow::~Win32OpenGLWindow() +{ + delete m_data; + sData = 0; +} + +void Win32OpenGLWindow::init() +{ + init(640,480,false); +} + + +void Win32OpenGLWindow::exit() +{ + disableOpenGL(); + DestroyWindow(this->m_data->m_hWnd); +} + + + + + +void Win32OpenGLWindow::startRendering() +{ + pumpMessage(); + + //glClearColor(1.f,0.f,0.f,1.f); + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); //clear buffers + + //glCullFace(GL_BACK); + //glFrontFace(GL_CCW); + glEnable(GL_DEPTH_TEST); + + + float aspect; + //btVector3 extents; + + if (m_data->m_width > m_data->m_height) + { + aspect = (float)m_data->m_width / (float)m_data->m_height; + //extents.setValue(aspect * 1.0f, 1.0f,0); + } else + { + aspect = (float)m_data->m_height / (float)m_data->m_width; + //extents.setValue(1.0f, aspect*1.f,0); + } + + glMatrixMode(GL_PROJECTION); + glLoadIdentity(); + + if (m_data->m_width > m_data->m_height) + { + glFrustum (-aspect, aspect, -1.0, 1.0, 1.0, 10000.0); + } else + { + glFrustum (-1.0, 1.0, -aspect, aspect, 1.0, 10000.0); + } + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); + +} + + +void Win32OpenGLWindow::renderAllObjects() +{ +} + +void Win32OpenGLWindow::endRendering() +{ + SwapBuffers( m_data->m_hDC ); +} + +float Win32OpenGLWindow::getTimeInSeconds() +{ + return 0.f; +} + +void Win32OpenGLWindow::setDebugMessage(int x,int y,const char* message) +{ +} + +bool Win32OpenGLWindow::requestedExit() +{ + return m_data->m_quit; +} \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/Win32OpenGLRenderManager.h b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/Win32OpenGLRenderManager.h new file mode 100644 index 000000000..861c6f5b6 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/Win32OpenGLRenderManager.h @@ -0,0 +1,70 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Erwin Coumans + + +#ifndef _WIN32_OPENGL_RENDER_MANAGER_H +#define _WIN32_OPENGL_RENDER_MANAGER_H + + +#define RM_DECLARE_HANDLE(name) typedef struct name##__ { int unused; } *name + +RM_DECLARE_HANDLE(RenderObjectHandle); + +struct InternalData2; + +class Win32OpenGLWindow +{ + protected: + + struct InternalData2* m_data; + + void enableOpenGL(); + + void disableOpenGL(); + + void pumpMessage(); + + + +public: + + Win32OpenGLWindow(); + + virtual ~Win32OpenGLWindow(); + + virtual void init(); //default implementation uses default settings for width/height/fullscreen + + void init(int width,int height, bool fullscreen=false, int colorBitsPerPixel=0, void* windowHandle=0); + + void switchFullScreen(bool fullscreen,int width=0,int height=0,int colorBitsPerPixel=0); + + virtual void exit(); + + + virtual void startRendering(); + + virtual void renderAllObjects(); + + virtual void endRendering(); + + virtual float getTimeInSeconds(); + + virtual void setDebugMessage(int x,int y,const char* message); + + virtual bool requestedExit(); + +}; + +#endif //_WIN32_OPENGL_RENDER_MANAGER_H diff --git a/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/main.cpp b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/main.cpp new file mode 100644 index 000000000..b4de1789f --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/main.cpp @@ -0,0 +1,224 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Erwin Coumans + +// +//#include "vld.h" +#include + +#include "GLInstancingRenderer.h" + + +#include "GLInstancingRenderer.h" +#include "../opengl_interop/btOpenCLGLInteropBuffer.h" +#include "Win32OpenGLRenderManager.h" +#include "CLPhysicsDemo.h" +#include "../broadphase_benchmark/btGridBroadphaseCl.h" +#include "../../opencl/gpu_rigidbody_pipeline/btGpuNarrowPhaseAndSolver.h" +#include "ShapeData.h" +#include "LinearMath/btQuickprof.h" + +int NUM_OBJECTS_X = 32; +int NUM_OBJECTS_Y = 24; +int NUM_OBJECTS_Z = 32; + + +float X_GAP = 2.f; +float Y_GAP = 2.f; +float Z_GAP = 2.f; + +extern int numPairsOut; + + +void createScene(GLInstancingRenderer& renderer,CLPhysicsDemo& physicsSim) +{ + int strideInBytes = sizeof(float)*9; + + int barrelShapeIndex = -1; + int cubeShapeIndex = -1; + + float position[4]={0,0,0,0}; + float orn[4] = {0,0,0,1}; + float color[4] = {1,1,1,1}; + int index=0; +#if 1 + { + int numVertices = sizeof(barrel_vertices)/strideInBytes; + int numIndices = sizeof(barrel_indices)/sizeof(int); + barrelShapeIndex = renderer.registerShape(&barrel_vertices[0],numVertices,barrel_indices,numIndices); + } + + + float barrelScaling[4] = {2,2,2,1}; + + + int barrelCollisionShapeIndex = physicsSim.registerCollisionShape(&barrel_vertices[0],strideInBytes, sizeof(barrel_vertices)/strideInBytes,&barrelScaling[0]); + + + + for (int i=0;iinit(1024,768); + GLenum err = glewInit(); + window->startRendering(); + window->endRendering(); + + GLInstancingRenderer render; + + + + + + CLPhysicsDemo demo(window); + + bool useInterop = true; + demo.init(-1,-1,useInterop); + + render.InitShaders(); + + if (useInterop) + demo.setupInterop(); + + createScene(render, demo); + + + printf("num objects = %d\n", NUM_OBJECTS_X*NUM_OBJECTS_Y*NUM_OBJECTS_Z); + + + render.writeTransforms(); + + + while (!window->requestedExit()) + { + CProfileManager::Reset(); + + demo.stepSimulation(); + + + window->startRendering(); + render.RenderScene(); + window->endRendering(); + + CProfileManager::Increment_Frame_Counter(); + + static bool printStats = true; + + if (printStats) + { + static int count = 10; + count--; + if (count<0) + { + CProfileManager::dumpAll(); + //printf("total broadphase pairs= %d\n", gFpIO.m_numOverlap); + printf("numPairsOut (culled) = %d\n", numPairsOut); + printStats = false; + } + } + + } + + + demo.cleanup(); + + render.CleanupShaders(); + window->exit(); + delete window; + + + + return 0; +} \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/premake4.lua b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/premake4.lua new file mode 100644 index 000000000..e78f276dc --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline2/premake4.lua @@ -0,0 +1,5 @@ + +include "AMD" +-- include "Intel" +-- include "NVIDIA" + \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/integration/AMD/premake4.lua b/Extras/RigidBodyGpuPipeline/opencl/integration/AMD/premake4.lua new file mode 100644 index 000000000..a8b029e81 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/integration/AMD/premake4.lua @@ -0,0 +1,34 @@ + + hasCL = findOpenCL_AMD() + + if (hasCL) then + + project "OpenCL_integration_AMD" + + initOpenCL_AMD() + + language "C++" + + kind "ConsoleApp" + targetdir "../../../bin" + + initOpenGL() + initGlut() + initGlew() + + includedirs { + "../../../rendering/BulletMath", + "../../primitives" + } + + files { + "../main.cpp", + "../../basic_initialize/btOpenCLUtils.cpp", + "../../basic_initialize/btOpenCLUtils.h", + "../../opengl_interop/btOpenCLGLInteropBuffer.cpp", + "../../opengl_interop/btOpenCLGLInteropBuffer.h", + "../../opengl_interop/btStopwatch.cpp", + "../../opengl_interop/btStopwatch.h" + } + + end \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/integration/Intel/premake4.lua b/Extras/RigidBodyGpuPipeline/opencl/integration/Intel/premake4.lua new file mode 100644 index 000000000..27c08660d --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/integration/Intel/premake4.lua @@ -0,0 +1,36 @@ + + hasCL = findOpenCL_Intel() + + if (hasCL) then + + project "OpenCL_integration_Intel" + + initOpenCL_Intel() + + language "C++" + + kind "ConsoleApp" + targetdir "../../../bin" + + + initOpenGL() + initGlut() + initGlew() + + + includedirs { + "../../../rendering/BulletMath", + "../../primitives" + } + + files { + "../main.cpp", + "../../basic_initialize/btOpenCLUtils.cpp", + "../../basic_initialize/btOpenCLUtils.h", + "../../opengl_interop/btOpenCLGLInteropBuffer.cpp", + "../../opengl_interop/btOpenCLGLInteropBuffer.h", + "../../opengl_interop/btStopwatch.cpp", + "../../opengl_interop/btStopwatch.h" + } + + end \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/integration/NVIDIA/premake4.lua b/Extras/RigidBodyGpuPipeline/opencl/integration/NVIDIA/premake4.lua new file mode 100644 index 000000000..b6b5272f1 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/integration/NVIDIA/premake4.lua @@ -0,0 +1,35 @@ + + hasCL = findOpenCL_NVIDIA() + + if (hasCL) then + + project "OpenCL_integration_NVIDIA" + + initOpenCL_NVIDIA() + + language "C++" + + kind "ConsoleApp" + targetdir "../../../bin" + + initOpenGL() + initGlut() + initGlew() + + + includedirs { + "../../../rendering/BulletMath", + "../../primitives" + } + + files { + "../main.cpp", + "../../basic_initialize/btOpenCLUtils.cpp", + "../../basic_initialize/btOpenCLUtils.h", + "../../opengl_interop/btOpenCLGLInteropBuffer.cpp", + "../../opengl_interop/btOpenCLGLInteropBuffer.h", + "../../opengl_interop/btStopwatch.cpp", + "../../opengl_interop/btStopwatch.h" + } + + end \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/integration/integrateKernel.cl b/Extras/RigidBodyGpuPipeline/opencl/integration/integrateKernel.cl new file mode 100644 index 000000000..fbb16be97 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/integration/integrateKernel.cl @@ -0,0 +1,73 @@ +MSTRINGIFY( + +float4 quatMult(float4 q1, float4 q2) +{ + float4 q; + q.x = q1.w * q2.x + q1.x * q2.w + q1.y * q2.z - q1.z * q2.y; + q.y = q1.w * q2.y + q1.y * q2.w + q1.z * q2.x - q1.x * q2.z; + q.z = q1.w * q2.z + q1.z * q2.w + q1.x * q2.y - q1.y * q2.x; + q.w = q1.w * q2.w - q1.x * q2.x - q1.y * q2.y - q1.z * q2.z; + return q; +} + +float4 quatNorm(float4 q) +{ + float len = native_sqrt(dot(q, q)); + if(len > 0.f) + { + q *= 1.f / len; + } + else + { + q.x = q.y = q.z = 0.f; + q.w = 1.f; + } + return q; +} + + + +__kernel void + interopKernel( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer, + __global float4 *linVel, + __global float4 *pAngVel) +{ + int nodeID = get_global_id(0); + float timeStep = 0.0166666; + + float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254); + + if( nodeID < numNodes ) + { + g_vertexBuffer[nodeID + startOffset/4] += linVel[nodeID]*timeStep; + + // g_vertexBuffer[nodeID + startOffset/4+numNodes] += angVel[nodeID]; + + float4 axis; + float4 angvel = pAngVel[nodeID]; + float fAngle = native_sqrt(dot(angvel, angvel)); + //limit the angular motion + if(fAngle*timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD) + { + fAngle = BT_GPU_ANGULAR_MOTION_THRESHOLD / timeStep; + } + if(fAngle < 0.001f) + { + // use Taylor's expansions of sync function + axis = angvel * (0.5f*timeStep-(timeStep*timeStep*timeStep)*0.020833333333f * fAngle * fAngle); + } + else + { + // sync(fAngle) = sin(c*fAngle)/t + axis = angvel * ( native_sin(0.5f * fAngle * timeStep) / fAngle); + } + float4 dorn = axis; + dorn.w = native_cos(fAngle * timeStep * 0.5f); + float4 orn0 = g_vertexBuffer[nodeID + startOffset/4+numNodes]; + float4 predictedOrn = quatMult(dorn, orn0); + predictedOrn = quatNorm(predictedOrn); + g_vertexBuffer[nodeID + startOffset/4+numNodes]=predictedOrn; + } +} + +); \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/integration/main.cpp b/Extras/RigidBodyGpuPipeline/opencl/integration/main.cpp new file mode 100644 index 000000000..26e44b6bf --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/integration/main.cpp @@ -0,0 +1,1106 @@ + +//starts crashing when more than 32700 objects on my Geforce 260, unless _USE_SUB_DATA is defined (still unstable though) +//runs fine with fewer objects + +#define NUM_OBJECTS_X 327 +#define NUM_OBJECTS_Y 20 +#define NUM_OBJECTS_Z 20 +//#define NUM_OBJECTS_Z 20 + +//#define _USE_SUB_DATA + +//#define NUM_OBJECTS_X 100 +//#define NUM_OBJECTS_Y 100 +//#define NUM_OBJECTS_Z 100 + +///RECREATE_CL_AND_SHADERS_ON_RESIZE will delete and re-create OpenCL and GLSL shaders/buffers at each resize +//#define RECREATE_CL_AND_SHADERS_ON_RESIZE + +/// +/// OpenCL - OpenGL interop example. Updating transforms of many cubes on GPU, without going through main memory/using the PCIe bus +/// Create all OpenGL resources AFTER create OpenCL context! +/// + + +#include +#include + +#include "btGlutInclude.h" +#include "../opengl_interop/btStopwatch.h" + + +#include "btVector3.h" +#include "btQuaternion.h" +#include "btMatrix3x3.h" +static float angle(0); + +#include + +#ifdef _WIN32 +#include +#endif + +//OpenCL stuff +#include "../basic_initialize/btOpenCLUtils.h" +#include "../opengl_interop/btOpenCLGLInteropBuffer.h" + +cl_context g_cxMainContext; +cl_command_queue g_cqCommandQue; +cl_device_id g_device; +static const size_t workGroupSize = 128; +cl_mem gLinVelMem; +cl_mem gAngVelMem; + + +btOpenCLGLInteropBuffer* g_interopBuffer = 0; +cl_kernel g_interopKernel; + +////for Adl +#include + +adl::DeviceCL* g_deviceCL=0; + + + +bool useCPU = false; +bool printStats = false; +bool runOpenCLKernels = true; + +#define MSTRINGIFY(A) #A +static char* interopKernelString = +#include "integrateKernel.cl" + +btStopwatch gStopwatch; +int m_glutScreenWidth = 640; +int m_glutScreenHeight= 480; + +bool m_ortho = false; + +static GLuint instancingShader; // The instancing renderer +static GLuint cube_vao; +static GLuint cube_vbo; +static GLuint index_vbo; +static GLuint m_texturehandle; + +static bool done = false; +static GLint angle_loc = 0; +static GLint ModelViewMatrix; +static GLint ProjectionMatrix; + + +static GLint uniform_texture_diffuse = 0; + +//used for dynamic loading from disk (default switched off) +#define MAX_SHADER_LENGTH 8192 +static GLubyte shaderText[MAX_SHADER_LENGTH]; + +static const char* vertexShader= \ +"#version 330\n" +"precision highp float;\n" +"\n" +"\n" +"\n" +"layout (location = 0) in vec4 position;\n" +"layout (location = 1) in vec4 instance_position;\n" +"layout (location = 2) in vec4 instance_quaternion;\n" +"layout (location = 3) in vec2 uvcoords;\n" +"layout (location = 4) in vec3 vertexnormal;\n" +"\n" +"\n" +"uniform float angle = 0.0;\n" +"uniform mat4 ModelViewMatrix;\n" +"uniform mat4 ProjectionMatrix;\n" +"\n" +"out Fragment\n" +"{\n" +" vec4 color;\n" +"} fragment;\n" +"\n" +"out Vert\n" +"{\n" +" vec2 texcoord;\n" +"} vert;\n" +"\n" +"\n" +"vec4 quatMul ( in vec4 q1, in vec4 q2 )\n" +"{\n" +" vec3 im = q1.w * q2.xyz + q1.xyz * q2.w + cross ( q1.xyz, q2.xyz );\n" +" vec4 dt = q1 * q2;\n" +" float re = dot ( dt, vec4 ( -1.0, -1.0, -1.0, 1.0 ) );\n" +" return vec4 ( im, re );\n" +"}\n" +"\n" +"vec4 quatFromAxisAngle(vec4 axis, in float angle)\n" +"{\n" +" float cah = cos(angle*0.5);\n" +" float sah = sin(angle*0.5);\n" +" float d = inversesqrt(dot(axis,axis));\n" +" vec4 q = vec4(axis.x*sah*d,axis.y*sah*d,axis.z*sah*d,cah);\n" +" return q;\n" +"}\n" +"//\n" +"// vector rotation via quaternion\n" +"//\n" +"vec4 quatRotate3 ( in vec3 p, in vec4 q )\n" +"{\n" +" vec4 temp = quatMul ( q, vec4 ( p, 0.0 ) );\n" +" return quatMul ( temp, vec4 ( -q.x, -q.y, -q.z, q.w ) );\n" +"}\n" +"vec4 quatRotate ( in vec4 p, in vec4 q )\n" +"{\n" +" vec4 temp = quatMul ( q, p );\n" +" return quatMul ( temp, vec4 ( -q.x, -q.y, -q.z, q.w ) );\n" +"}\n" +"\n" +"out vec3 lightDir,normal,ambient;\n" +"\n" +"void main(void)\n" +"{\n" +" vec4 q = instance_quaternion;\n" +" ambient = vec3(0.2,0.2,0.2);\n" +" \n" +" \n" +" vec4 local_normal = (quatRotate3( vertexnormal,q));\n" +" vec3 light_pos = vec3(1000,1000,1000);\n" +" normal = normalize(ModelViewMatrix * local_normal).xyz;\n" +"\n" +" lightDir = normalize(light_pos);//gl_LightSource[0].position.xyz));\n" +"// lightDir = normalize(vec3(gl_LightSource[0].position));\n" +" \n" +" vec4 axis = vec4(1,1,1,0);\n" +" vec4 localcoord = quatRotate3( position.xyz,q);\n" +" vec4 vertexPos = ProjectionMatrix * ModelViewMatrix *(instance_position+localcoord);\n" +"\n" +" gl_Position = vertexPos;\n" +" \n" +"// fragment.color = instance_color;\n" +" vert.texcoord = uvcoords;\n" +"}\n" +; + + +static const char* fragmentShader= \ +"#version 330\n" +"precision highp float;\n" +"\n" +"in Fragment\n" +"{\n" +" vec4 color;\n" +"} fragment;\n" +"\n" +"in Vert\n" +"{\n" +" vec2 texcoord;\n" +"} vert;\n" +"\n" +"uniform sampler2D Diffuse;\n" +"\n" +"in vec3 lightDir,normal,ambient;\n" +"\n" +"out vec4 color;\n" +"\n" +"void main_textured(void)\n" +"{\n" +" color = texture2D(Diffuse,vert.texcoord);//fragment.color;\n" +"}\n" +"\n" +"void main(void)\n" +"{\n" +" vec4 texel = texture2D(Diffuse,vert.texcoord);//fragment.color;\n" +" vec3 ct,cf;\n" +" float intensity,at,af;\n" +" intensity = max(dot(lightDir,normalize(normal)),0.0);\n" +" cf = intensity*vec3(1.0,1.0,1.0);//intensity * (gl_FrontMaterial.diffuse).rgb+ambient;//gl_FrontMaterial.ambient.rgb;\n" +" af = 1.0;\n" +" \n" +" ct = texel.rgb;\n" +" at = texel.a;\n" +" \n" +" color = vec4(ct * cf, at * af); \n" +"}\n" +; + + +// Load the shader from the source text +void gltLoadShaderSrc(const char *szShaderSrc, GLuint shader) +{ + GLchar *fsStringPtr[1]; + + fsStringPtr[0] = (GLchar *)szShaderSrc; + glShaderSource(shader, 1, (const GLchar **)fsStringPtr, NULL); +} + + +//////////////////////////////////////////////////////////////// +// Load the shader from the specified file. Returns false if the +// shader could not be loaded +bool gltLoadShaderFile(const char *szFile, GLuint shader) +{ + GLint shaderLength = 0; + FILE *fp; + + // Open the shader file + fp = fopen(szFile, "r"); + if(fp != NULL) + { + // See how long the file is + while (fgetc(fp) != EOF) + shaderLength++; + + // Allocate a block of memory to send in the shader + assert(shaderLength < MAX_SHADER_LENGTH); // make me bigger! + if(shaderLength > MAX_SHADER_LENGTH) + { + fclose(fp); + return false; + } + + // Go back to beginning of file + rewind(fp); + + // Read the whole file in + if (shaderText != NULL) + fread(shaderText, 1, shaderLength, fp); + + // Make sure it is null terminated and close the file + shaderText[shaderLength] = '\0'; + fclose(fp); + } + else + return false; + + // printf(shaderText); + // Load the string + gltLoadShaderSrc((const char *)shaderText, shader); + + return true; +} + + +///////////////////////////////////////////////////////////////// +// Load a pair of shaders, compile, and link together. Specify the complete +// file path for each shader. Note, there is no support for +// just loading say a vertex program... you have to do both. +GLuint gltLoadShaderPair(const char *szVertexProg, const char *szFragmentProg, bool loadFromFile) +{ + // Temporary Shader objects + GLuint hVertexShader; + GLuint hFragmentShader; + GLuint hReturn = 0; + GLint testVal; + + // Create shader objects + hVertexShader = glCreateShader(GL_VERTEX_SHADER); + hFragmentShader = glCreateShader(GL_FRAGMENT_SHADER); + + if (loadFromFile) + { + + if(gltLoadShaderFile(szVertexProg, hVertexShader) == false) + { + glDeleteShader(hVertexShader); + glDeleteShader(hFragmentShader); + return (GLuint)NULL; + } + + if(gltLoadShaderFile(szFragmentProg, hFragmentShader) == false) + { + glDeleteShader(hVertexShader); + glDeleteShader(hFragmentShader); + return (GLuint)NULL; + } + } else + { + gltLoadShaderSrc(vertexShader, hVertexShader); + gltLoadShaderSrc(fragmentShader, hFragmentShader); + } + // Compile them + glCompileShader(hVertexShader); + glCompileShader(hFragmentShader); + + // Check for errors + glGetShaderiv(hVertexShader, GL_COMPILE_STATUS, &testVal); + if(testVal == GL_FALSE) + { + char temp[256] = ""; + glGetShaderInfoLog( hVertexShader, 256, NULL, temp); + fprintf( stderr, "Compile failed:\n%s\n", temp); + assert(0); + exit(0); + glDeleteShader(hVertexShader); + glDeleteShader(hFragmentShader); + return (GLuint)NULL; + } + + glGetShaderiv(hFragmentShader, GL_COMPILE_STATUS, &testVal); + if(testVal == GL_FALSE) + { + char temp[256] = ""; + glGetShaderInfoLog( hFragmentShader, 256, NULL, temp); + fprintf( stderr, "Compile failed:\n%s\n", temp); + assert(0); + exit(0); + glDeleteShader(hVertexShader); + glDeleteShader(hFragmentShader); + return (GLuint)NULL; + } + + // Link them - assuming it works... + hReturn = glCreateProgram(); + glAttachShader(hReturn, hVertexShader); + glAttachShader(hReturn, hFragmentShader); + + glLinkProgram(hReturn); + + // These are no longer needed + glDeleteShader(hVertexShader); + glDeleteShader(hFragmentShader); + + // Make sure link worked too + glGetProgramiv(hReturn, GL_LINK_STATUS, &testVal); + if(testVal == GL_FALSE) + { + glDeleteProgram(hReturn); + return (GLuint)NULL; + } + + return hReturn; +} + +///position xyz, unused w, normal, uv +static const GLfloat cube_vertices[] = +{ + -1.0f, -1.0f, 1.0f, 0.0f, 0,0,1, 0,0,//0 + 1.0f, -1.0f, 1.0f, 0.0f, 0,0,1, 1,0,//1 + 1.0f, 1.0f, 1.0f, 0.0f, 0,0,1, 1,1,//2 + -1.0f, 1.0f, 1.0f, 0.0f, 0,0,1, 0,1 ,//3 + + -1.0f, -1.0f, -1.0f, 1.0f, 0,0,-1, 0,0,//4 + 1.0f, -1.0f, -1.0f, 1.0f, 0,0,-1, 1,0,//5 + 1.0f, 1.0f, -1.0f, 1.0f, 0,0,-1, 1,1,//6 + -1.0f, 1.0f, -1.0f, 1.0f, 0,0,-1, 0,1,//7 + + -1.0f, -1.0f, -1.0f, 1.0f, -1,0,0, 0,0, + -1.0f, 1.0f, -1.0f, 1.0f, -1,0,0, 1,0, + -1.0f, 1.0f, 1.0f, 1.0f, -1,0,0, 1,1, + -1.0f, -1.0f, 1.0f, 1.0f, -1,0,0, 0,1, + + 1.0f, -1.0f, -1.0f, 1.0f, 1,0,0, 0,0, + 1.0f, 1.0f, -1.0f, 1.0f, 1,0,0, 1,0, + 1.0f, 1.0f, 1.0f, 1.0f, 1,0,0, 1,1, + 1.0f, -1.0f, 1.0f, 1.0f, 1,0,0, 0,1, + + -1.0f, -1.0f, -1.0f, 1.0f, 0,-1,0, 0,0, + -1.0f, -1.0f, 1.0f, 1.0f, 0,-1,0, 1,0, + 1.0f, -1.0f, 1.0f, 1.0f, 0,-1,0, 1,1, + 1.0f,-1.0f, -1.0f, 1.0f, 0,-1,0, 0,1, + + -1.0f, 1.0f, -1.0f, 1.0f, 0,1,0, 0,0, + -1.0f, 1.0f, 1.0f, 1.0f, 0,1,0, 1,0, + 1.0f, 1.0f, 1.0f, 1.0f, 0,1,0, 1,1, + 1.0f,1.0f, -1.0f, 1.0f, 0,1,0, 0,1, +}; + +static const int cube_indices[]= +{ + 0,1,2,0,2,3,//ground face + 4,5,6,4,6,7,//top face + 8,9,10,8,10,11, + 12,13,14,12,14,15, + 16,17,18,16,18,19, + 20,21,22,20,22,23 +}; + + + + + +void DeleteCL() +{ + clReleaseContext(g_cxMainContext); + clReleaseCommandQueue(g_cqCommandQue); +} + +void InitCL() +{ + void* glCtx=0; + void* glDC = 0; + +#ifdef _WIN32 + glCtx = wglGetCurrentContext(); +#else //!_WIN32 + GLXContext glCtx = glXGetCurrentContext(); +#endif //!_WIN32 + glDC = wglGetCurrentDC(); + + int ciErrNum = 0; + cl_device_type deviceType = CL_DEVICE_TYPE_ALL;//GPU; + g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + + int numDev = btOpenCLUtils::getNumDevices(g_cxMainContext); + + if (numDev>0) + { + g_device= btOpenCLUtils::getDevice(g_cxMainContext,0); + btOpenCLDeviceInfo clInfo; + btOpenCLUtils::getDeviceInfo(g_device,clInfo); + btOpenCLUtils::printDeviceInfo(g_device); + // create a command-queue + g_cqCommandQue = clCreateCommandQueue(g_cxMainContext, g_device, 0, &ciErrNum); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + //normally you would create and execute kernels using this command queue + + } + + +} + +#define NUM_OBJECTS (NUM_OBJECTS_X*NUM_OBJECTS_Y*NUM_OBJECTS_Z) +#define POSITION_BUFFER_SIZE (NUM_OBJECTS*sizeof(float)*4) +#define ORIENTATION_BUFFER_SIZE (NUM_OBJECTS*sizeof(float)*4) + + +GLfloat* instance_positions_ptr = 0; +GLfloat* instance_quaternion_ptr = 0; + +void DeleteShaders() +{ + glDeleteVertexArrays(1, &cube_vao); + glDeleteBuffers(1,&index_vbo); + glDeleteBuffers(1,&cube_vbo); + glDeleteProgram(instancingShader); +} + +void writeTransforms() +{ + + + glFlush(); + char* bla = (char*)glMapBuffer( GL_ARRAY_BUFFER,GL_READ_WRITE);//GL_WRITE_ONLY + + float* positions = (float*)(bla+sizeof(cube_vertices)); + float* orientations = (float*)(bla+sizeof(cube_vertices) + POSITION_BUFFER_SIZE); + // positions[0]+=0.001f; + + static int offset=0; + //offset++; + + static btVector3 axis(1,0,0); + angle += 0.01f; + int index=0; + btQuaternion orn(axis,angle); + for (int i=0;i m_glutScreenHeight) + { + aspect = m_glutScreenWidth / (float)m_glutScreenHeight; + extents.setValue(aspect * 1.0f, 1.0f,0); + } else + { + aspect = m_glutScreenHeight / (float)m_glutScreenWidth; + extents.setValue(1.0f, aspect*1.f,0); + } + + + if (m_ortho) + { + // reset matrix + glLoadIdentity(); + extents *= m_cameraDistance; + btVector3 lower = m_cameraTargetPosition - extents; + btVector3 upper = m_cameraTargetPosition + extents; + glOrtho(lower.getX(), upper.getX(), lower.getY(), upper.getY(),-1000,1000); + + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); + } else + { + if (m_glutScreenWidth > m_glutScreenHeight) + { + glFrustum (-aspect * m_frustumZNear, aspect * m_frustumZNear, -m_frustumZNear, m_frustumZNear, m_frustumZNear, m_frustumZFar); + } else + { + glFrustum (-aspect * m_frustumZNear, aspect * m_frustumZNear, -m_frustumZNear, m_frustumZNear, m_frustumZNear, m_frustumZFar); + } + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); + gluLookAt(m_cameraPosition[0], m_cameraPosition[1], m_cameraPosition[2], + m_cameraTargetPosition[0], m_cameraTargetPosition[1], m_cameraTargetPosition[2], + m_cameraUp.getX(),m_cameraUp.getY(),m_cameraUp.getZ()); + } + +} + + + +void myinit() +{ + + + // GLfloat light_ambient[] = { btScalar(0.2), btScalar(0.2), btScalar(0.2), btScalar(1.0) }; + GLfloat light_ambient[] = { btScalar(1.0), btScalar(1.2), btScalar(0.2), btScalar(1.0) }; + + GLfloat light_diffuse[] = { btScalar(1.0), btScalar(1.0), btScalar(1.0), btScalar(1.0) }; + GLfloat light_specular[] = { btScalar(1.0), btScalar(1.0), btScalar(1.0), btScalar(1.0 )}; + /* light_position is NOT default value */ + GLfloat light_position0[] = { btScalar(1000.0), btScalar(1000.0), btScalar(1000.0), btScalar(0.0 )}; + GLfloat light_position1[] = { btScalar(-1.0), btScalar(-10.0), btScalar(-1.0), btScalar(0.0) }; + + glLightfv(GL_LIGHT0, GL_AMBIENT, light_ambient); + glLightfv(GL_LIGHT0, GL_DIFFUSE, light_diffuse); + glLightfv(GL_LIGHT0, GL_SPECULAR, light_specular); + glLightfv(GL_LIGHT0, GL_POSITION, light_position0); + + glLightfv(GL_LIGHT1, GL_AMBIENT, light_ambient); + glLightfv(GL_LIGHT1, GL_DIFFUSE, light_diffuse); + glLightfv(GL_LIGHT1, GL_SPECULAR, light_specular); + glLightfv(GL_LIGHT1, GL_POSITION, light_position1); + + glEnable(GL_LIGHTING); + glEnable(GL_LIGHT0); + glEnable(GL_LIGHT1); + + + // glShadeModel(GL_FLAT);//GL_SMOOTH); + glShadeModel(GL_SMOOTH); + + glEnable(GL_DEPTH_TEST); + glDepthFunc(GL_LESS); + + glClearColor(float(0.7),float(0.7),float(0.7),float(0)); + glEnable(GL_LIGHTING); + glEnable(GL_LIGHT0); + + + static bool m_textureenabled = true; + static bool m_textureinitialized = false; + + + if(m_textureenabled) + { + if(!m_textureinitialized) + { + glActiveTexture(GL_TEXTURE0); + + GLubyte* image=new GLubyte[256*256*3]; + for(int y=0;y<256;++y) + { + const int t=y>>5; + GLubyte* pi=image+y*256*3; + for(int x=0;x<256;++x) + { + const int s=x>>5; + const GLubyte b=180; + GLubyte c=b+((s+t&1)&1)*(255-b); + pi[0]=255; + pi[1]=c; + pi[2]=c; + pi+=3; + } + } + + glGenTextures(1,(GLuint*)&m_texturehandle); + glBindTexture(GL_TEXTURE_2D,m_texturehandle); + glTexEnvf(GL_TEXTURE_ENV,GL_TEXTURE_ENV_MODE,GL_MODULATE); + glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_MIN_FILTER,GL_LINEAR_MIPMAP_LINEAR); + glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_MAG_FILTER,GL_LINEAR_MIPMAP_LINEAR); + glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_WRAP_S,GL_REPEAT); + glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_WRAP_T,GL_REPEAT); + gluBuild2DMipmaps(GL_TEXTURE_2D,3,256,256,GL_RGB,GL_UNSIGNED_BYTE,image); + delete[] image; + m_textureinitialized=true; + } + // glMatrixMode(GL_TEXTURE); + // glLoadIdentity(); + // glMatrixMode(GL_MODELVIEW); + + glEnable(GL_TEXTURE_2D); + glBindTexture(GL_TEXTURE_2D,m_texturehandle); + + } else + { + glDisable(GL_TEXTURE_2D); + } + + glEnable(GL_COLOR_MATERIAL); + + + // glEnable(GL_CULL_FACE); + // glCullFace(GL_BACK); +} + +//#pragma optimize( "g", off ) + +void updatePos() +{ + + + if (useCPU) + { + int index=0; + for (int i=0;igetCLBUffer(); + cl_int ciErrNum = CL_SUCCESS; + ciErrNum = clEnqueueAcquireGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, NULL); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + if (runOpenCLKernels) + { + int numObjects = NUM_OBJECTS; + int offset = (sizeof(cube_vertices) )/4; + + ciErrNum = clSetKernelArg(g_interopKernel, 0, sizeof(int), &offset); + ciErrNum = clSetKernelArg(g_interopKernel, 1, sizeof(int), &numObjects); + ciErrNum = clSetKernelArg(g_interopKernel, 2, sizeof(cl_mem), (void*)&clBuffer ); + + ciErrNum = clSetKernelArg(g_interopKernel, 3, sizeof(cl_mem), (void*)&gLinVelMem); + ciErrNum = clSetKernelArg(g_interopKernel, 4, sizeof(cl_mem), (void*)&gAngVelMem); + + size_t numWorkItems = workGroupSize*((NUM_OBJECTS + (workGroupSize-1)) / workGroupSize); + ciErrNum = clEnqueueNDRangeKernel(g_cqCommandQue, g_interopKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + } + + ciErrNum = clEnqueueReleaseGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, 0); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + clFinish(g_cqCommandQue); + + } + +} +//#pragma optimize( "g", on ) + +void RenderScene(void) +{ + +#if 0 + float modelview[20]={0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9}; + // get the current modelview matrix + glGetFloatv(GL_MODELVIEW_MATRIX , modelview); + float projection[20]={0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9}; + glGetFloatv(GL_PROJECTION_MATRIX, projection); +#endif + + myinit(); + + updateCamera(); + + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + + //render coordinate system + glBegin(GL_LINES); + glColor3f(1,0,0); + glVertex3f(0,0,0); + glVertex3f(1,0,0); + glColor3f(0,1,0); + glVertex3f(0,0,0); + glVertex3f(0,1,0); + glColor3f(0,0,1); + glVertex3f(0,0,0); + glVertex3f(0,0,1); + glEnd(); + + //do a finish, to make sure timings are clean + // glFinish(); + + float start = gStopwatch.getTimeMilliseconds(); + + // glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindBuffer(GL_ARRAY_BUFFER, cube_vbo); + glFlush(); + updatePos(); + + float stop = gStopwatch.getTimeMilliseconds(); + gStopwatch.reset(); + + if (printStats) + { + printf("updatePos=%f ms on ",stop-start); + + if (useCPU) + { + printf("CPU \n"); + } else + { + printf("OpenCL "); + if (runOpenCLKernels) + printf("running the kernels"); + else + printf("without running the kernels"); + printf("\n"); + } + } + + glBindVertexArray(cube_vao); + + glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, 9*sizeof(float), 0); + glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, 0, (GLvoid *)(sizeof(cube_vertices))); + glVertexAttribPointer(2, 4, GL_FLOAT, GL_FALSE, 0, (GLvoid *)(sizeof(cube_vertices)+POSITION_BUFFER_SIZE)); + int uvoffset = 7*sizeof(float); + int normaloffset = 4*sizeof(float); + + glVertexAttribPointer(3, 2, GL_FLOAT, GL_FALSE, 9*sizeof(float), (GLvoid *)uvoffset); + glVertexAttribPointer(4, 3, GL_FLOAT, GL_FALSE, 9*sizeof(float), (GLvoid *)normaloffset); + + glEnableVertexAttribArray(0); + glEnableVertexAttribArray(1); + glEnableVertexAttribArray(2); + glEnableVertexAttribArray(3); + glEnableVertexAttribArray(4); + + glVertexAttribDivisor(1, 1); + glVertexAttribDivisor(2, 1); + glVertexAttribDivisor(3, 0); + glVertexAttribDivisor(4, 0); + + glUseProgram(instancingShader); + glUniform1f(angle_loc, 0); + GLfloat pm[16]; + glGetFloatv(GL_PROJECTION_MATRIX, pm); + glUniformMatrix4fv(ProjectionMatrix, 1, false, &pm[0]); + + GLfloat mvm[16]; + glGetFloatv(GL_MODELVIEW_MATRIX, mvm); + glUniformMatrix4fv(ModelViewMatrix, 1, false, &mvm[0]); + + glUniform1i(uniform_texture_diffuse, 0); + + glFlush(); + int numInstances = NUM_OBJECTS; + int indexCount = sizeof(cube_indices)/sizeof(int); + int indexOffset = 0; + + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_vbo); + glDrawElementsInstanced(GL_TRIANGLES, indexCount, GL_UNSIGNED_INT, (void*)indexOffset, numInstances); + + glUseProgram(0); + glBindBuffer(GL_ARRAY_BUFFER,0); + glBindVertexArray(0); + + glutSwapBuffers(); + glutPostRedisplay(); + + GLint err = glGetError(); + assert(err==GL_NO_ERROR); +} + + +void ChangeSize(int w, int h) +{ + m_glutScreenWidth = w; + m_glutScreenHeight = h; + +#ifdef RECREATE_CL_AND_SHADERS_ON_RESIZE + delete g_interopBuffer; + clReleaseKernel(g_interopKernel); + DeleteCL(); + DeleteShaders(); +#endif //RECREATE_CL_AND_SHADERS_ON_RESIZE + + // Set Viewport to window dimensions + glViewport(0, 0, w, h); + +#ifdef RECREATE_CL_AND_SHADERS_ON_RESIZE + InitCL(); + InitShaders(); + + g_interopBuffer = new btOpenCLGLInteropBuffer(g_cxMainContext,g_cqCommandQue,cube_vbo); + clFinish(g_cqCommandQue); + g_interopKernel = btOpenCLUtils::compileCLKernelFromString(g_cxMainContext, interopKernelString, "interopKernel" ); +#endif //RECREATE_CL_AND_SHADERS_ON_RESIZE + +} + +void Keyboard(unsigned char key, int x, int y) +{ + switch (key) + { + case 27: + done = true; + break; + case 'O': + case 'o': + { + m_ortho = !m_ortho; + break; + } + case 'c': + case 'C': + { + useCPU = !useCPU; + if (useCPU) + printf("using CPU\n"); + else + printf("using OpenCL\n"); + break; + } + case 's': + case 'S': + { + printStats = !printStats; + break; + } + case 'k': + case 'K': + { + runOpenCLKernels=!runOpenCLKernels; + break; + } + case 'q': + case 'Q': + exit(0); + default: + break; + } +} + +// Cleanup +void ShutdownRC(void) +{ + glDeleteBuffers(1, &cube_vbo); + glDeleteVertexArrays(1, &cube_vao); +} + +int main(int argc, char* argv[]) +{ + // printf("vertexShader = \n%s\n",vertexShader); + // printf("fragmentShader = \n%s\n",fragmentShader); + + glutInit(&argc, argv); + + glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGBA); + + + glutInitWindowSize(m_glutScreenWidth, m_glutScreenHeight); + char buf[1024]; + sprintf(buf,"OpenCL - OpenGL interop, transforms %d cubes on the GPU (use c to toggle CPU/CL)", NUM_OBJECTS); + glutCreateWindow(buf); + + glutReshapeFunc(ChangeSize); + + glutKeyboardFunc(Keyboard); + glutDisplayFunc(RenderScene); + + GLenum err = glewInit(); + if (GLEW_OK != err) + { + /* Problem: glewInit failed, something is seriously wrong. */ + fprintf(stderr, "Error: %s\n", glewGetErrorString(err)); + } + + //ChangeSize(m_glutScreenWidth,m_glutScreenHeight); + + InitCL(); + + +#define CUSTOM_CL_INITIALIZATION +#ifdef CUSTOM_CL_INITIALIZATION + g_deviceCL = new adl::DeviceCL(); + g_deviceCL->m_deviceIdx = g_device; + g_deviceCL->m_context = g_cxMainContext; + g_deviceCL->m_commandQueue = g_cqCommandQue; + +#else + DeviceUtils::Config cfg; + cfg.m_type = DeviceUtils::Config::DEVICE_CPU; + g_deviceCL = DeviceUtils::allocate( TYPE_CL, cfg ); +#endif + + int size = NUM_OBJECTS; + adl::Buffer linvelBuf( g_deviceCL, size ); + adl::Buffer angvelBuf( g_deviceCL, size ); + + gLinVelMem = (cl_mem)linvelBuf.m_ptr; + gAngVelMem = (cl_mem)angvelBuf.m_ptr; + + btVector3* linVelHost= new btVector3[size]; + btVector3* angVelHost = new btVector3[size]; + + for (int i=0;i +#include +#include +#endif + +#if defined (SUNOS) || defined (__SUNOS__) +#include +#endif + +#if defined(WIN32) || defined(_WIN32) + +#define BT_USE_WINDOWS_TIMERS +#define WIN32_LEAN_AND_MEAN +#define NOWINRES +#define NOMCX +#define NOIME + +#ifdef _XBOX + #include +#else //_XBOX + #include +#endif //_XBOX + +#include + + +#else //_WIN32 +#include +#endif //_WIN32 + +#define mymin(a,b) (a > b ? a : b) + +struct btStopwatchData +{ + +#ifdef BT_USE_WINDOWS_TIMERS + LARGE_INTEGER mClockFrequency; + DWORD mStartTick; + LONGLONG mPrevElapsedTime; + LARGE_INTEGER mStartTime; +#else +#ifdef __CELLOS_LV2__ + uint64_t mStartTime; +#else + struct timeval mStartTime; +#endif +#endif //__CELLOS_LV2__ + +}; + + +btStopwatch::btStopwatch() +{ + m_data = new btStopwatchData; +#ifdef BT_USE_WINDOWS_TIMERS + QueryPerformanceFrequency(&m_data->mClockFrequency); +#endif + reset(); +} + +btStopwatch::~btStopwatch() +{ + delete m_data; +} + +btStopwatch::btStopwatch(const btStopwatch& other) +{ + m_data = new btStopwatchData; + *m_data = *other.m_data; +} + +btStopwatch& btStopwatch::operator=(const btStopwatch& other) +{ + *m_data = *other.m_data; + return *this; +} + + + /// Resets the initial reference time. +void btStopwatch::reset() +{ +#ifdef BT_USE_WINDOWS_TIMERS + QueryPerformanceCounter(&m_data->mStartTime); + m_data->mStartTick = GetTickCount(); + m_data->mPrevElapsedTime = 0; +#else +#ifdef __CELLOS_LV2__ + + typedef uint64_t ClockSize; + ClockSize newTime; + //__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory"); + SYS_TIMEBASE_GET( newTime ); + m_data->mStartTime = newTime; +#else + gettimeofday(&m_data->mStartTime, 0); +#endif +#endif +} + +/// Returns the time in ms since the last call to reset or since +/// the btStopwatch was created. +float btStopwatch::getTimeMilliseconds() +{ + return getTimeMicroseconds()/1000.f; +} + + /// Returns the time in us since the last call to reset or since + /// the stopwatch was created. +unsigned long int btStopwatch::getTimeMicroseconds() +{ +#ifdef BT_USE_WINDOWS_TIMERS + LARGE_INTEGER currentTime; + QueryPerformanceCounter(¤tTime); + LONGLONG elapsedTime = currentTime.QuadPart - m_data->mStartTime.QuadPart; + + // Compute the number of millisecond ticks elapsed. + unsigned long msecTicks = (unsigned long)(1000 * elapsedTime / m_data->mClockFrequency.QuadPart); + + // Check for unexpected leaps in the Win32 performance counter. + // (This is caused by unexpected data across the PCI to ISA + // bridge, aka south bridge. See Microsoft KB274323.) + unsigned long elapsedTicks = GetTickCount() - m_data->mStartTick; + signed long msecOff = (signed long)(msecTicks - elapsedTicks); + if (msecOff < -100 || msecOff > 100) + { + // Adjust the starting time forwards. + LONGLONG msecAdjustment = mymin(msecOff * + m_data->mClockFrequency.QuadPart / 1000, elapsedTime - + m_data->mPrevElapsedTime); + m_data->mStartTime.QuadPart += msecAdjustment; + elapsedTime -= msecAdjustment; + } + + // Store the current elapsed time for adjustments next time. + m_data->mPrevElapsedTime = elapsedTime; + + // Convert to microseconds. + unsigned long usecTicks = (unsigned long)(1000000 * elapsedTime / + m_data->mClockFrequency.QuadPart); + + return usecTicks; +#else + +#ifdef __CELLOS_LV2__ + uint64_t freq=sys_time_get_timebase_frequency(); + double dFreq=((double) freq)/ 1000000.0; + typedef uint64_t ClockSize; + ClockSize newTime; + //__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory"); + SYS_TIMEBASE_GET( newTime ); + + return (unsigned long int)((double(newTime-m_data->mStartTime)) / dFreq); +#else + + struct timeval currentTime; + gettimeofday(¤tTime, 0); + return (currentTime.tv_sec - m_data->mStartTime.tv_sec) * 1000000 + (currentTime.tv_usec - m_data->mStartTime.tv_usec); +#endif//__CELLOS_LV2__ +#endif +} + + + diff --git a/Extras/RigidBodyGpuPipeline/opencl/opengl_interop/btStopwatch.h b/Extras/RigidBodyGpuPipeline/opencl/opengl_interop/btStopwatch.h new file mode 100644 index 000000000..19e8d3722 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/opengl_interop/btStopwatch.h @@ -0,0 +1,45 @@ +/* +Stopwatch for timing and profiling for the Bullet Physics Library, http://bulletphysics.org +Copyright (c) 2003-2011 Erwin Coumans + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#ifndef BT_STOPWATCH_H +#define BT_STOPWATCH_H + +///The btStopwatch is a portable basic clock that measures real-time, use for profiling etc. +class btStopwatch +{ +public: + btStopwatch(); + + btStopwatch(const btStopwatch& other); + btStopwatch& operator=(const btStopwatch& other); + + ~btStopwatch(); + + /// Resets the initial reference time. + void reset(); + + /// Returns the time in ms since the last call to reset or since + /// the btStopwatch was created. + float getTimeMilliseconds(); + + /// Returns the time in us since the last call to reset or since + /// the Clock was created. + unsigned long int getTimeMicroseconds(); +private: + struct btStopwatchData* m_data; +}; + + +#endif //BT_STOPWATCH_H \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/opengl_interop/interopKernel.cl b/Extras/RigidBodyGpuPipeline/opencl/opengl_interop/interopKernel.cl new file mode 100644 index 000000000..e65da56dc --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/opengl_interop/interopKernel.cl @@ -0,0 +1,13 @@ +MSTRINGIFY( + +__kernel void +interopKernel( const int startOffset, const int numNodes, __global float *g_vertexBuffer) +{ + int nodeID = get_global_id(0); + if( nodeID < numNodes ) + { + g_vertexBuffer[nodeID*4 + startOffset+1] += 0.01; + } +} + +); \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/opengl_interop/main.cpp b/Extras/RigidBodyGpuPipeline/opencl/opengl_interop/main.cpp new file mode 100644 index 000000000..9ee3921b5 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/opengl_interop/main.cpp @@ -0,0 +1,1057 @@ + +//starts crashing when more than 32700 objects on my Geforce 260, unless _USE_SUB_DATA is defined (still unstable though) +//runs fine with fewer objects + +#define NUM_OBJECTS_X 327 +#define NUM_OBJECTS_Y 10 +#define NUM_OBJECTS_Z 10 +//#define NUM_OBJECTS_Z 20 + +//#define _USE_SUB_DATA + +//#define NUM_OBJECTS_X 100 +//#define NUM_OBJECTS_Y 100 +//#define NUM_OBJECTS_Z 100 + +///RECREATE_CL_AND_SHADERS_ON_RESIZE will delete and re-create OpenCL and GLSL shaders/buffers at each resize +//#define RECREATE_CL_AND_SHADERS_ON_RESIZE + +/// +/// OpenCL - OpenGL interop example. Updating transforms of many cubes on GPU, without going through main memory/using the PCIe bus +/// Create all OpenGL resources AFTER create OpenCL context! +/// + + +#include +#include + +#include "btGlutInclude.h" +#include "btStopwatch.h" + + +#include "btVector3.h" +#include "btQuaternion.h" +#include "btMatrix3x3.h" +static float angle(0); + +#include + +#ifdef _WIN32 +#include +#endif + +//OpenCL stuff +#include "../basic_initialize/btOpenCLUtils.h" +#include "btOpenCLGLInteropBuffer.h" + +cl_context g_cxMainContext; +cl_command_queue g_cqCommandQue; +cl_device_id g_device; +static const size_t workGroupSize = 128; + + +btOpenCLGLInteropBuffer* g_interopBuffer = 0; +cl_kernel g_interopKernel; + +bool useCPU = false; +bool printStats = false; +bool runOpenCLKernels = true; + +#define MSTRINGIFY(A) #A +static char* interopKernelString = +#include "interopKernel.cl" + +btStopwatch gStopwatch; +int m_glutScreenWidth = 640; +int m_glutScreenHeight= 480; + +bool m_ortho = false; + +static GLuint instancingShader; // The instancing renderer +static GLuint cube_vao; +static GLuint cube_vbo; +static GLuint index_vbo; +static GLuint m_texturehandle; + +static bool done = false; +static GLint angle_loc = 0; +static GLint ModelViewMatrix; +static GLint ProjectionMatrix; + + +static GLint uniform_texture_diffuse = 0; + +//used for dynamic loading from disk (default switched off) +#define MAX_SHADER_LENGTH 8192 +static GLubyte shaderText[MAX_SHADER_LENGTH]; + +static const char* vertexShader= \ +"#version 330\n" +"precision highp float;\n" +"\n" +"\n" +"\n" +"layout (location = 0) in vec4 position;\n" +"layout (location = 1) in vec4 instance_position;\n" +"layout (location = 2) in vec4 instance_quaternion;\n" +"layout (location = 3) in vec2 uvcoords;\n" +"layout (location = 4) in vec3 vertexnormal;\n" +"\n" +"\n" +"uniform float angle = 0.0;\n" +"uniform mat4 ModelViewMatrix;\n" +"uniform mat4 ProjectionMatrix;\n" +"\n" +"out Fragment\n" +"{\n" +" vec4 color;\n" +"} fragment;\n" +"\n" +"out Vert\n" +"{\n" +" vec2 texcoord;\n" +"} vert;\n" +"\n" +"\n" +"vec4 quatMul ( in vec4 q1, in vec4 q2 )\n" +"{\n" +" vec3 im = q1.w * q2.xyz + q1.xyz * q2.w + cross ( q1.xyz, q2.xyz );\n" +" vec4 dt = q1 * q2;\n" +" float re = dot ( dt, vec4 ( -1.0, -1.0, -1.0, 1.0 ) );\n" +" return vec4 ( im, re );\n" +"}\n" +"\n" +"vec4 quatFromAxisAngle(vec4 axis, in float angle)\n" +"{\n" +" float cah = cos(angle*0.5);\n" +" float sah = sin(angle*0.5);\n" +" float d = inversesqrt(dot(axis,axis));\n" +" vec4 q = vec4(axis.x*sah*d,axis.y*sah*d,axis.z*sah*d,cah);\n" +" return q;\n" +"}\n" +"//\n" +"// vector rotation via quaternion\n" +"//\n" +"vec4 quatRotate3 ( in vec3 p, in vec4 q )\n" +"{\n" +" vec4 temp = quatMul ( q, vec4 ( p, 0.0 ) );\n" +" return quatMul ( temp, vec4 ( -q.x, -q.y, -q.z, q.w ) );\n" +"}\n" +"vec4 quatRotate ( in vec4 p, in vec4 q )\n" +"{\n" +" vec4 temp = quatMul ( q, p );\n" +" return quatMul ( temp, vec4 ( -q.x, -q.y, -q.z, q.w ) );\n" +"}\n" +"\n" +"out vec3 lightDir,normal,ambient;\n" +"\n" +"void main(void)\n" +"{\n" +" vec4 q = instance_quaternion;\n" +" ambient = vec3(0.2,0.2,0.2);\n" +" \n" +" \n" +" vec4 local_normal = (quatRotate3( vertexnormal,q));\n" +" vec3 light_pos = vec3(1000,1000,1000);\n" +" normal = normalize(ModelViewMatrix * local_normal).xyz;\n" +"\n" +" lightDir = normalize(light_pos);//gl_LightSource[0].position.xyz));\n" +"// lightDir = normalize(vec3(gl_LightSource[0].position));\n" +" \n" +" vec4 axis = vec4(1,1,1,0);\n" +" vec4 localcoord = quatRotate3( position.xyz,q);\n" +" vec4 vertexPos = ProjectionMatrix * ModelViewMatrix *(instance_position+localcoord);\n" +"\n" +" gl_Position = vertexPos;\n" +" \n" +"// fragment.color = instance_color;\n" +" vert.texcoord = uvcoords;\n" +"}\n" +; + + +static const char* fragmentShader= \ +"#version 330\n" +"precision highp float;\n" +"\n" +"in Fragment\n" +"{\n" +" vec4 color;\n" +"} fragment;\n" +"\n" +"in Vert\n" +"{\n" +" vec2 texcoord;\n" +"} vert;\n" +"\n" +"uniform sampler2D Diffuse;\n" +"\n" +"in vec3 lightDir,normal,ambient;\n" +"\n" +"out vec4 color;\n" +"\n" +"void main_textured(void)\n" +"{\n" +" color = texture2D(Diffuse,vert.texcoord);//fragment.color;\n" +"}\n" +"\n" +"void main(void)\n" +"{\n" +" vec4 texel = texture2D(Diffuse,vert.texcoord);//fragment.color;\n" +" vec3 ct,cf;\n" +" float intensity,at,af;\n" +" intensity = max(dot(lightDir,normalize(normal)),0.0);\n" +" cf = intensity*vec3(1.0,1.0,1.0);//intensity * (gl_FrontMaterial.diffuse).rgb+ambient;//gl_FrontMaterial.ambient.rgb;\n" +" af = 1.0;\n" +" \n" +" ct = texel.rgb;\n" +" at = texel.a;\n" +" \n" +" color = vec4(ct * cf, at * af); \n" +"}\n" +; + + +// Load the shader from the source text +void gltLoadShaderSrc(const char *szShaderSrc, GLuint shader) +{ + GLchar *fsStringPtr[1]; + + fsStringPtr[0] = (GLchar *)szShaderSrc; + glShaderSource(shader, 1, (const GLchar **)fsStringPtr, NULL); +} + + +//////////////////////////////////////////////////////////////// +// Load the shader from the specified file. Returns false if the +// shader could not be loaded +bool gltLoadShaderFile(const char *szFile, GLuint shader) +{ + GLint shaderLength = 0; + FILE *fp; + + // Open the shader file + fp = fopen(szFile, "r"); + if(fp != NULL) + { + // See how long the file is + while (fgetc(fp) != EOF) + shaderLength++; + + // Allocate a block of memory to send in the shader + assert(shaderLength < MAX_SHADER_LENGTH); // make me bigger! + if(shaderLength > MAX_SHADER_LENGTH) + { + fclose(fp); + return false; + } + + // Go back to beginning of file + rewind(fp); + + // Read the whole file in + if (shaderText != NULL) + fread(shaderText, 1, shaderLength, fp); + + // Make sure it is null terminated and close the file + shaderText[shaderLength] = '\0'; + fclose(fp); + } + else + return false; + + // printf(shaderText); + // Load the string + gltLoadShaderSrc((const char *)shaderText, shader); + + return true; +} + + +///////////////////////////////////////////////////////////////// +// Load a pair of shaders, compile, and link together. Specify the complete +// file path for each shader. Note, there is no support for +// just loading say a vertex program... you have to do both. +GLuint gltLoadShaderPair(const char *szVertexProg, const char *szFragmentProg, bool loadFromFile) +{ + // Temporary Shader objects + GLuint hVertexShader; + GLuint hFragmentShader; + GLuint hReturn = 0; + GLint testVal; + + // Create shader objects + hVertexShader = glCreateShader(GL_VERTEX_SHADER); + hFragmentShader = glCreateShader(GL_FRAGMENT_SHADER); + + if (loadFromFile) + { + + if(gltLoadShaderFile(szVertexProg, hVertexShader) == false) + { + glDeleteShader(hVertexShader); + glDeleteShader(hFragmentShader); + return (GLuint)NULL; + } + + if(gltLoadShaderFile(szFragmentProg, hFragmentShader) == false) + { + glDeleteShader(hVertexShader); + glDeleteShader(hFragmentShader); + return (GLuint)NULL; + } + } else + { + gltLoadShaderSrc(vertexShader, hVertexShader); + gltLoadShaderSrc(fragmentShader, hFragmentShader); + } + // Compile them + glCompileShader(hVertexShader); + glCompileShader(hFragmentShader); + + // Check for errors + glGetShaderiv(hVertexShader, GL_COMPILE_STATUS, &testVal); + if(testVal == GL_FALSE) + { + char temp[256] = ""; + glGetShaderInfoLog( hVertexShader, 256, NULL, temp); + fprintf( stderr, "Compile failed:\n%s\n", temp); + assert(0); + exit(0); + glDeleteShader(hVertexShader); + glDeleteShader(hFragmentShader); + return (GLuint)NULL; + } + + glGetShaderiv(hFragmentShader, GL_COMPILE_STATUS, &testVal); + if(testVal == GL_FALSE) + { + char temp[256] = ""; + glGetShaderInfoLog( hFragmentShader, 256, NULL, temp); + fprintf( stderr, "Compile failed:\n%s\n", temp); + assert(0); + exit(0); + glDeleteShader(hVertexShader); + glDeleteShader(hFragmentShader); + return (GLuint)NULL; + } + + // Link them - assuming it works... + hReturn = glCreateProgram(); + glAttachShader(hReturn, hVertexShader); + glAttachShader(hReturn, hFragmentShader); + + glLinkProgram(hReturn); + + // These are no longer needed + glDeleteShader(hVertexShader); + glDeleteShader(hFragmentShader); + + // Make sure link worked too + glGetProgramiv(hReturn, GL_LINK_STATUS, &testVal); + if(testVal == GL_FALSE) + { + glDeleteProgram(hReturn); + return (GLuint)NULL; + } + + return hReturn; +} + +///position xyz, unused w, normal, uv +static const GLfloat cube_vertices[] = +{ + -1.0f, -1.0f, 1.0f, 0.0f, 0,0,1, 0,0,//0 + 1.0f, -1.0f, 1.0f, 0.0f, 0,0,1, 1,0,//1 + 1.0f, 1.0f, 1.0f, 0.0f, 0,0,1, 1,1,//2 + -1.0f, 1.0f, 1.0f, 0.0f, 0,0,1, 0,1 ,//3 + + -1.0f, -1.0f, -1.0f, 1.0f, 0,0,-1, 0,0,//4 + 1.0f, -1.0f, -1.0f, 1.0f, 0,0,-1, 1,0,//5 + 1.0f, 1.0f, -1.0f, 1.0f, 0,0,-1, 1,1,//6 + -1.0f, 1.0f, -1.0f, 1.0f, 0,0,-1, 0,1,//7 + + -1.0f, -1.0f, -1.0f, 1.0f, -1,0,0, 0,0, + -1.0f, 1.0f, -1.0f, 1.0f, -1,0,0, 1,0, + -1.0f, 1.0f, 1.0f, 1.0f, -1,0,0, 1,1, + -1.0f, -1.0f, 1.0f, 1.0f, -1,0,0, 0,1, + + 1.0f, -1.0f, -1.0f, 1.0f, 1,0,0, 0,0, + 1.0f, 1.0f, -1.0f, 1.0f, 1,0,0, 1,0, + 1.0f, 1.0f, 1.0f, 1.0f, 1,0,0, 1,1, + 1.0f, -1.0f, 1.0f, 1.0f, 1,0,0, 0,1, + + -1.0f, -1.0f, -1.0f, 1.0f, 0,-1,0, 0,0, + -1.0f, -1.0f, 1.0f, 1.0f, 0,-1,0, 1,0, + 1.0f, -1.0f, 1.0f, 1.0f, 0,-1,0, 1,1, + 1.0f,-1.0f, -1.0f, 1.0f, 0,-1,0, 0,1, + + -1.0f, 1.0f, -1.0f, 1.0f, 0,1,0, 0,0, + -1.0f, 1.0f, 1.0f, 1.0f, 0,1,0, 1,0, + 1.0f, 1.0f, 1.0f, 1.0f, 0,1,0, 1,1, + 1.0f,1.0f, -1.0f, 1.0f, 0,1,0, 0,1, +}; + +static const int cube_indices[]= +{ + 0,1,2,0,2,3,//ground face + 4,5,6,4,6,7,//top face + 8,9,10,8,10,11, + 12,13,14,12,14,15, + 16,17,18,16,18,19, + 20,21,22,20,22,23 +}; + + + + + +void DeleteCL() +{ + clReleaseContext(g_cxMainContext); + clReleaseCommandQueue(g_cqCommandQue); +} + +void InitCL() +{ + void* glCtx=0; + void* glDC = 0; + +#ifdef _WIN32 + glCtx = wglGetCurrentContext(); +#else //!_WIN32 + GLXContext glCtx = glXGetCurrentContext(); +#endif //!_WIN32 + glDC = wglGetCurrentDC(); + + int ciErrNum = 0; + cl_device_type deviceType = CL_DEVICE_TYPE_ALL;//CPU; + g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + + int numDev = btOpenCLUtils::getNumDevices(g_cxMainContext); + + if (numDev>0) + { + g_device= btOpenCLUtils::getDevice(g_cxMainContext,0); + btOpenCLDeviceInfo clInfo; + btOpenCLUtils::getDeviceInfo(g_device,clInfo); + btOpenCLUtils::printDeviceInfo(g_device); + // create a command-queue + g_cqCommandQue = clCreateCommandQueue(g_cxMainContext, g_device, 0, &ciErrNum); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + //normally you would create and execute kernels using this command queue + + } + + +} + +#define NUM_OBJECTS (NUM_OBJECTS_X*NUM_OBJECTS_Y*NUM_OBJECTS_Z) +#define POSITION_BUFFER_SIZE (NUM_OBJECTS*sizeof(float)*4) +#define ORIENTATION_BUFFER_SIZE (NUM_OBJECTS*sizeof(float)*4) + + +GLfloat* instance_positions_ptr = 0; +GLfloat* instance_quaternion_ptr = 0; + +void DeleteShaders() +{ + glDeleteVertexArrays(1, &cube_vao); + glDeleteBuffers(1,&index_vbo); + glDeleteBuffers(1,&cube_vbo); + glDeleteProgram(instancingShader); +} + +void writeTransforms() +{ + + + glFlush(); + char* bla = (char*)glMapBuffer( GL_ARRAY_BUFFER,GL_READ_WRITE);//GL_WRITE_ONLY + + float* positions = (float*)(bla+sizeof(cube_vertices)); + float* orientations = (float*)(bla+sizeof(cube_vertices) + POSITION_BUFFER_SIZE); + // positions[0]+=0.001f; + + static int offset=0; + //offset++; + + static btVector3 axis(1,0,0); + angle += 0.01f; + int index=0; + btQuaternion orn(axis,angle); + for (int i=0;i m_glutScreenHeight) + { + aspect = m_glutScreenWidth / (float)m_glutScreenHeight; + extents.setValue(aspect * 1.0f, 1.0f,0); + } else + { + aspect = m_glutScreenHeight / (float)m_glutScreenWidth; + extents.setValue(1.0f, aspect*1.f,0); + } + + + if (m_ortho) + { + // reset matrix + glLoadIdentity(); + extents *= m_cameraDistance; + btVector3 lower = m_cameraTargetPosition - extents; + btVector3 upper = m_cameraTargetPosition + extents; + glOrtho(lower.getX(), upper.getX(), lower.getY(), upper.getY(),-1000,1000); + + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); + } else + { + if (m_glutScreenWidth > m_glutScreenHeight) + { + glFrustum (-aspect * m_frustumZNear, aspect * m_frustumZNear, -m_frustumZNear, m_frustumZNear, m_frustumZNear, m_frustumZFar); + } else + { + glFrustum (-aspect * m_frustumZNear, aspect * m_frustumZNear, -m_frustumZNear, m_frustumZNear, m_frustumZNear, m_frustumZFar); + } + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); + gluLookAt(m_cameraPosition[0], m_cameraPosition[1], m_cameraPosition[2], + m_cameraTargetPosition[0], m_cameraTargetPosition[1], m_cameraTargetPosition[2], + m_cameraUp.getX(),m_cameraUp.getY(),m_cameraUp.getZ()); + } + +} + + + +void myinit() +{ + + + // GLfloat light_ambient[] = { btScalar(0.2), btScalar(0.2), btScalar(0.2), btScalar(1.0) }; + GLfloat light_ambient[] = { btScalar(1.0), btScalar(1.2), btScalar(0.2), btScalar(1.0) }; + + GLfloat light_diffuse[] = { btScalar(1.0), btScalar(1.0), btScalar(1.0), btScalar(1.0) }; + GLfloat light_specular[] = { btScalar(1.0), btScalar(1.0), btScalar(1.0), btScalar(1.0 )}; + /* light_position is NOT default value */ + GLfloat light_position0[] = { btScalar(1000.0), btScalar(1000.0), btScalar(1000.0), btScalar(0.0 )}; + GLfloat light_position1[] = { btScalar(-1.0), btScalar(-10.0), btScalar(-1.0), btScalar(0.0) }; + + glLightfv(GL_LIGHT0, GL_AMBIENT, light_ambient); + glLightfv(GL_LIGHT0, GL_DIFFUSE, light_diffuse); + glLightfv(GL_LIGHT0, GL_SPECULAR, light_specular); + glLightfv(GL_LIGHT0, GL_POSITION, light_position0); + + glLightfv(GL_LIGHT1, GL_AMBIENT, light_ambient); + glLightfv(GL_LIGHT1, GL_DIFFUSE, light_diffuse); + glLightfv(GL_LIGHT1, GL_SPECULAR, light_specular); + glLightfv(GL_LIGHT1, GL_POSITION, light_position1); + + glEnable(GL_LIGHTING); + glEnable(GL_LIGHT0); + glEnable(GL_LIGHT1); + + + // glShadeModel(GL_FLAT);//GL_SMOOTH); + glShadeModel(GL_SMOOTH); + + glEnable(GL_DEPTH_TEST); + glDepthFunc(GL_LESS); + + glClearColor(float(0.7),float(0.7),float(0.7),float(0)); + glEnable(GL_LIGHTING); + glEnable(GL_LIGHT0); + + + static bool m_textureenabled = true; + static bool m_textureinitialized = false; + + + if(m_textureenabled) + { + if(!m_textureinitialized) + { + glActiveTexture(GL_TEXTURE0); + + GLubyte* image=new GLubyte[256*256*3]; + for(int y=0;y<256;++y) + { + const int t=y>>5; + GLubyte* pi=image+y*256*3; + for(int x=0;x<256;++x) + { + const int s=x>>5; + const GLubyte b=180; + GLubyte c=b+((s+t&1)&1)*(255-b); + pi[0]=255; + pi[1]=c; + pi[2]=c; + pi+=3; + } + } + + glGenTextures(1,(GLuint*)&m_texturehandle); + glBindTexture(GL_TEXTURE_2D,m_texturehandle); + glTexEnvf(GL_TEXTURE_ENV,GL_TEXTURE_ENV_MODE,GL_MODULATE); + glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_MIN_FILTER,GL_LINEAR_MIPMAP_LINEAR); + glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_MAG_FILTER,GL_LINEAR_MIPMAP_LINEAR); + glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_WRAP_S,GL_REPEAT); + glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_WRAP_T,GL_REPEAT); + gluBuild2DMipmaps(GL_TEXTURE_2D,3,256,256,GL_RGB,GL_UNSIGNED_BYTE,image); + delete[] image; + m_textureinitialized=true; + } + // glMatrixMode(GL_TEXTURE); + // glLoadIdentity(); + // glMatrixMode(GL_MODELVIEW); + + glEnable(GL_TEXTURE_2D); + glBindTexture(GL_TEXTURE_2D,m_texturehandle); + + } else + { + glDisable(GL_TEXTURE_2D); + } + + glEnable(GL_COLOR_MATERIAL); + + + // glEnable(GL_CULL_FACE); + // glCullFace(GL_BACK); +} + +//#pragma optimize( "g", off ) + +void updatePos() +{ + + + if (useCPU) + { + int index=0; + for (int i=0;igetCLBUffer(); + cl_int ciErrNum = CL_SUCCESS; + ciErrNum = clEnqueueAcquireGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, NULL); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + if (runOpenCLKernels) + { + int numObjects = NUM_OBJECTS; + int offset = (sizeof(cube_vertices) )/4; + + ciErrNum = clSetKernelArg(g_interopKernel, 0, sizeof(int), &offset); + ciErrNum = clSetKernelArg(g_interopKernel, 1, sizeof(int), &numObjects); + ciErrNum = clSetKernelArg(g_interopKernel, 2, sizeof(cl_mem), (void*)&clBuffer ); + size_t numWorkItems = workGroupSize*((NUM_OBJECTS + (workGroupSize-1)) / workGroupSize); + ciErrNum = clEnqueueNDRangeKernel(g_cqCommandQue, g_interopKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + } + ciErrNum = clEnqueueReleaseGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, 0); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + clFinish(g_cqCommandQue); + + } + +} +//#pragma optimize( "g", on ) + +void RenderScene(void) +{ + +#if 0 + float modelview[20]={0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9}; + // get the current modelview matrix + glGetFloatv(GL_MODELVIEW_MATRIX , modelview); + float projection[20]={0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9}; + glGetFloatv(GL_PROJECTION_MATRIX, projection); +#endif + + myinit(); + + updateCamera(); + + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + + //render coordinate system + glBegin(GL_LINES); + glColor3f(1,0,0); + glVertex3f(0,0,0); + glVertex3f(1,0,0); + glColor3f(0,1,0); + glVertex3f(0,0,0); + glVertex3f(0,1,0); + glColor3f(0,0,1); + glVertex3f(0,0,0); + glVertex3f(0,0,1); + glEnd(); + + //do a finish, to make sure timings are clean + // glFinish(); + + float start = gStopwatch.getTimeMilliseconds(); + + // glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindBuffer(GL_ARRAY_BUFFER, cube_vbo); + glFlush(); + updatePos(); + + float stop = gStopwatch.getTimeMilliseconds(); + gStopwatch.reset(); + + if (printStats) + { + printf("updatePos=%f ms on ",stop-start); + + if (useCPU) + { + printf("CPU \n"); + } else + { + printf("OpenCL "); + if (runOpenCLKernels) + printf("running the kernels"); + else + printf("without running the kernels"); + printf("\n"); + } + } + + glBindVertexArray(cube_vao); + + glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, 9*sizeof(float), 0); + glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, 0, (GLvoid *)(sizeof(cube_vertices))); + glVertexAttribPointer(2, 4, GL_FLOAT, GL_FALSE, 0, (GLvoid *)(sizeof(cube_vertices)+POSITION_BUFFER_SIZE)); + int uvoffset = 7*sizeof(float); + int normaloffset = 4*sizeof(float); + + glVertexAttribPointer(3, 2, GL_FLOAT, GL_FALSE, 9*sizeof(float), (GLvoid *)uvoffset); + glVertexAttribPointer(4, 3, GL_FLOAT, GL_FALSE, 9*sizeof(float), (GLvoid *)normaloffset); + + glEnableVertexAttribArray(0); + glEnableVertexAttribArray(1); + glEnableVertexAttribArray(2); + glEnableVertexAttribArray(3); + glEnableVertexAttribArray(4); + + glVertexAttribDivisor(1, 1); + glVertexAttribDivisor(2, 1); + glVertexAttribDivisor(3, 0); + glVertexAttribDivisor(4, 0); + + glUseProgram(instancingShader); + glUniform1f(angle_loc, 0); + GLfloat pm[16]; + glGetFloatv(GL_PROJECTION_MATRIX, pm); + glUniformMatrix4fv(ProjectionMatrix, 1, false, &pm[0]); + + GLfloat mvm[16]; + glGetFloatv(GL_MODELVIEW_MATRIX, mvm); + glUniformMatrix4fv(ModelViewMatrix, 1, false, &mvm[0]); + + glUniform1i(uniform_texture_diffuse, 0); + + glFlush(); + int numInstances = NUM_OBJECTS; + int indexCount = sizeof(cube_indices)/sizeof(int); + int indexOffset = 0; + + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_vbo); + glDrawElementsInstanced(GL_TRIANGLES, indexCount, GL_UNSIGNED_INT, (void*)indexOffset, numInstances); + + glUseProgram(0); + glBindBuffer(GL_ARRAY_BUFFER,0); + glBindVertexArray(0); + + glutSwapBuffers(); + glutPostRedisplay(); + + GLint err = glGetError(); + assert(err==GL_NO_ERROR); +} + + +void ChangeSize(int w, int h) +{ + m_glutScreenWidth = w; + m_glutScreenHeight = h; + +#ifdef RECREATE_CL_AND_SHADERS_ON_RESIZE + delete g_interopBuffer; + clReleaseKernel(g_interopKernel); + DeleteCL(); + DeleteShaders(); +#endif //RECREATE_CL_AND_SHADERS_ON_RESIZE + + // Set Viewport to window dimensions + glViewport(0, 0, w, h); + +#ifdef RECREATE_CL_AND_SHADERS_ON_RESIZE + InitCL(); + InitShaders(); + + g_interopBuffer = new btOpenCLGLInteropBuffer(g_cxMainContext,g_cqCommandQue,cube_vbo); + clFinish(g_cqCommandQue); + g_interopKernel = btOpenCLUtils::compileCLKernelFromString(g_cxMainContext, interopKernelString, "interopKernel" ); +#endif //RECREATE_CL_AND_SHADERS_ON_RESIZE + +} + +void Keyboard(unsigned char key, int x, int y) +{ + switch (key) + { + case 27: + done = true; + break; + case 'O': + case 'o': + { + m_ortho = !m_ortho; + break; + } + case 'c': + case 'C': + { + useCPU = !useCPU; + if (useCPU) + printf("using CPU\n"); + else + printf("using OpenCL\n"); + break; + } + case 's': + case 'S': + { + printStats = !printStats; + break; + } + case 'k': + case 'K': + { + runOpenCLKernels=!runOpenCLKernels; + break; + } + case 'q': + case 'Q': + exit(0); + default: + break; + } +} + +// Cleanup +void ShutdownRC(void) +{ + glDeleteBuffers(1, &cube_vbo); + glDeleteVertexArrays(1, &cube_vao); +} + +int main(int argc, char* argv[]) +{ + // printf("vertexShader = \n%s\n",vertexShader); + // printf("fragmentShader = \n%s\n",fragmentShader); + + glutInit(&argc, argv); + + glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGBA); + + + glutInitWindowSize(m_glutScreenWidth, m_glutScreenHeight); + char buf[1024]; + sprintf(buf,"OpenCL - OpenGL interop, transforms %d cubes on the GPU (use c to toggle CPU/CL)", NUM_OBJECTS); + glutCreateWindow(buf); + + glutReshapeFunc(ChangeSize); + + glutKeyboardFunc(Keyboard); + glutDisplayFunc(RenderScene); + + GLenum err = glewInit(); + if (GLEW_OK != err) + { + /* Problem: glewInit failed, something is seriously wrong. */ + fprintf(stderr, "Error: %s\n", glewGetErrorString(err)); + } + + //ChangeSize(m_glutScreenWidth,m_glutScreenHeight); + + InitCL(); + + + InitShaders(); + + g_interopBuffer = new btOpenCLGLInteropBuffer(g_cxMainContext,g_cqCommandQue,cube_vbo); + clFinish(g_cqCommandQue); + + + g_interopKernel = btOpenCLUtils::compileCLKernelFromString(g_cxMainContext, g_device,interopKernelString, "interopKernel" ); + + glutMainLoop(); + ShutdownRC(); + + return 0; +} diff --git a/Extras/RigidBodyGpuPipeline/opencl/opengl_interop/premake4.lua b/Extras/RigidBodyGpuPipeline/opencl/opengl_interop/premake4.lua new file mode 100644 index 000000000..422952b33 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/opengl_interop/premake4.lua @@ -0,0 +1,5 @@ + + include "AMD" + include "Intel" +-- include "NVIDIA" + \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/Adl.cpp b/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/Adl.cpp new file mode 100644 index 000000000..fdd5ed3e9 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/Adl.cpp @@ -0,0 +1,19 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +#include + +//KernelManager* KernelManager::s_kManager = NULL; diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/Adl.h b/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/Adl.h new file mode 100644 index 000000000..5d51abe4e --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/Adl.h @@ -0,0 +1,235 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +#ifndef ADL_H +#define ADL_H + +#pragma warning( disable : 4996 ) +#include +#include +#include + +#ifndef max +#define max(a,b) (((a) > (b)) ? (a) : (b)) +#endif + +#ifndef min +#define min(a,b) (((a) < (b)) ? (a) : (b)) +#endif + +namespace adl +{ + +enum DeviceType +{ + TYPE_CL = 0, + TYPE_DX11 = 1, + TYPE_HOST, +}; + + +struct Device; + +struct BufferBase +{ + enum BufferType + { + BUFFER, + + // for dx + BUFFER_CONST, + BUFFER_STAGING, + BUFFER_APPEND, + BUFFER_RAW, + BUFFER_W_COUNTER, + BUFFER_INDEX, + BUFFER_VERTEX, + + // for cl + BUFFER_ZERO_COPY, + + }; +}; + +class DeviceUtils +{ + public: + struct Config + { + enum DeviceType + { + DEVICE_GPU, + DEVICE_CPU, + }; + + // for CL + enum DeviceVendor + { + VD_AMD, + VD_INTEL, + VD_NV, + }; + + Config() : m_type(DEVICE_GPU), m_deviceIdx(0), m_vendor(VD_AMD){} + + DeviceType m_type; + int m_deviceIdx; + DeviceVendor m_vendor; + }; + + __inline + static + int getNDevices( DeviceType type ); + __inline + static Device* allocate( DeviceType type, Config& cfg ); + __inline + static void deallocate( Device* deviceData ); + __inline + static void waitForCompletion( const Device* deviceData ); +}; + +//========================== +// DeviceData +//========================== +struct Kernel; + +struct Device +{ + typedef DeviceUtils::Config Config; + + Device( DeviceType type ) : m_type( type ), m_memoryUsage(0) + { + } + + virtual void* getContext() const { return 0; } + virtual void initialize(const Config& cfg){} + virtual void release(){} + virtual void waitForCompletion() const {} + virtual void getDeviceName( char nameOut[128] ) const {} + virtual Kernel* getKernel(const char* fileName, const char* funcName, const char* option = NULL, const char* src = NULL, bool cacheKernel = true ) const { ADLASSERT(0); return 0;} + virtual unsigned int getUsedMemory() const { return m_memoryUsage; } + + DeviceType m_type; + unsigned int m_memoryUsage; +}; + +//========================== +// Buffer +//========================== + +template +struct HostBuffer; +// overload each deviceDatas +template +struct Buffer : public BufferBase +{ + __inline + Buffer(); + __inline + Buffer(const Device* device, int nElems, BufferType type = BUFFER ); + __inline + virtual ~Buffer(); + + __inline + void setRawPtr( const Device* device, T* ptr, int size, BufferType type = BUFFER ); + __inline + void allocate(const Device* device, int nElems, BufferType type = BUFFER ); + __inline + void write(T* hostSrcPtr, int nElems, int dstOffsetNElems = 0); + __inline + void read(T* hostDstPtr, int nElems, int srcOffsetNElems = 0) const; + __inline + void write(Buffer& src, int nElems); + __inline + void read(Buffer& dst, int nElems) const; +// __inline +// Buffer& operator = (const Buffer& buffer); + __inline + int getSize() const { return m_size; } + + DeviceType getType() const { ADLASSERT( m_device ); return m_device->m_type; } + + + const Device* m_device; + int m_size; + T* m_ptr; + // for DX11 + void* m_uav; + void* m_srv; + bool m_allocated; // todo. move this to a bit +}; + +class BufferUtils +{ +public: + template + __inline + static + typename Buffer* map(const Device* device, const Buffer* in, int copySize = -1); + + template + __inline + static + void unmap( Buffer* native, const Buffer* orig, int copySize = -1 ); +}; + +//========================== +// HostBuffer +//========================== +struct DeviceHost; + +template +struct HostBuffer : public Buffer +{ + __inline + HostBuffer():Buffer(){} + __inline + HostBuffer(const Device* device, int nElems, BufferType type = BUFFER ) : Buffer(device, nElems, type) {} +// HostBuffer(const Device* deviceData, T* rawPtr, int nElems); + + + __inline + T& operator[](int idx); + __inline + const T& operator[](int idx) const; + __inline + T* begin() { return m_ptr; } + + __inline + HostBuffer& operator = (const Buffer& device); +}; + +}; + +#include +#if defined(ADL_ENABLE_CL) + #include +#endif +#if defined(ADL_ENABLE_DX11) + #include +#endif + +#include +#include +#include + + +#include + +#include +#include + +#endif diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/Adl.inl b/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/Adl.inl new file mode 100644 index 000000000..d732e86d6 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/Adl.inl @@ -0,0 +1,344 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +namespace adl +{ + +int DeviceUtils::getNDevices( DeviceType type ) +{ + switch( type ) + { +#if defined(ADL_ENABLE_CL) + case TYPE_CL: + return DeviceCL::getNDevices(); +#endif +#if defined(ADL_ENABLE_DX11) + case TYPE_DX11: + return DeviceDX11::getNDevices(); +#endif + default: + return 1; + }; +} + +Device* DeviceUtils::allocate( DeviceType type, Config& cfg ) +{ + Device* deviceData; + switch( type ) + { +#if defined(ADL_ENABLE_CL) + case TYPE_CL: + deviceData = new DeviceCL(); + break; +#endif +#if defined(ADL_ENABLE_DX11) + case TYPE_DX11: + deviceData = new DeviceDX11(); + break; +#endif + case TYPE_HOST: + deviceData = new DeviceHost(); + break; + default: + ADLASSERT( 0 ); + break; + }; + deviceData->initialize( cfg ); + return deviceData; +} + +void DeviceUtils::deallocate( Device* deviceData ) +{ + ADLASSERT( deviceData->getUsedMemory() == 0 ); + deviceData->release(); + delete deviceData; +} + +void DeviceUtils::waitForCompletion( const Device* deviceData ) +{ + deviceData->waitForCompletion(); +} + +#if defined(ADL_ENABLE_DX11) + #if defined(ADL_ENABLE_CL) + #define SELECT_DEVICEDATA( type, func ) \ + switch( type ) \ + { \ + case TYPE_CL: ((DeviceCL*)m_device)->func; break; \ + case TYPE_DX11: ((DeviceDX11*)m_device)->func; break; \ + case TYPE_HOST: ((DeviceHost*)m_device)->func; break; \ + default: ADLASSERT(0); break; \ + } + + #define SELECT_DEVICEDATA1( deviceData, func ) \ + switch( deviceData->m_type ) \ + { \ + case TYPE_CL: ((DeviceCL*)deviceData)->func; break; \ + case TYPE_DX11: ((DeviceDX11*)deviceData)->func; break; \ + case TYPE_HOST: ((DeviceHost*)deviceData)->func; break; \ + default: ADLASSERT(0); break; \ + } + #else + #define SELECT_DEVICEDATA( type, func ) \ + switch( type ) \ + { \ + case TYPE_DX11: ((DeviceDX11*)m_device)->func; break; \ + case TYPE_HOST: ((DeviceHost*)m_device)->func; break; \ + default: ADLASSERT(0); break; \ + } + + #define SELECT_DEVICEDATA1( deviceData, func ) \ + switch( deviceData->m_type ) \ + { \ + case TYPE_DX11: ((DeviceDX11*)deviceData)->func; break; \ + case TYPE_HOST: ((DeviceHost*)deviceData)->func; break; \ + default: ADLASSERT(0); break; \ + } + #endif +#else + #if defined(ADL_ENABLE_CL) + #define SELECT_DEVICEDATA( type, func ) \ + switch( type ) \ + { \ + case TYPE_CL: ((DeviceCL*)m_device)->func; break; \ + case TYPE_HOST: ((DeviceHost*)m_device)->func; break; \ + default: ADLASSERT(0); break; \ + } + + #define SELECT_DEVICEDATA1( deviceData, func ) \ + switch( deviceData->m_type ) \ + { \ + case TYPE_CL: ((DeviceCL*)deviceData)->func; break; \ + case TYPE_HOST: ((DeviceHost*)deviceData)->func; break; \ + default: ADLASSERT(0); break; \ + } + #else + #define SELECT_DEVICEDATA( type, func ) \ + switch( type ) \ + { \ + case TYPE_HOST: ((DeviceHost*)m_device)->func; break; \ + default: ADLASSERT(0); break; \ + } + + #define SELECT_DEVICEDATA1( deviceData, func ) \ + switch( deviceData->m_type ) \ + { \ + case TYPE_HOST: ((DeviceHost*)deviceData)->func; break; \ + default: ADLASSERT(0); break; \ + } + #endif +#endif + +template +Buffer::Buffer() +{ + m_device = 0; + m_size = 0; + m_ptr = 0; + + m_uav = 0; + m_srv = 0; + + m_allocated = false; +} + +template +Buffer::Buffer(const Device* deviceData, int nElems, BufferType type ) +{ + m_device = 0; + allocate( deviceData, nElems, type ); +} + +template +Buffer::~Buffer() +{ + if( m_allocated ) + { + if( m_device ) + SELECT_DEVICEDATA( m_device->m_type, deallocate( this ) ); + } + + m_device = 0; + m_ptr = 0; + m_size = 0; +} + +template +void Buffer::setRawPtr( const Device* device, T* ptr, int size, BufferType type ) +{ + ADLASSERT( m_device == 0 ); + ADLASSERT( type == BUFFER ); // todo. implement + ADLASSERT( device->m_type != TYPE_DX11 ); // todo. implement set srv, uav + + m_device = device; + m_ptr = ptr; + m_size = size; +} + +template +void Buffer::allocate(const Device* deviceData, int nElems, BufferType type ) +{ + ADLASSERT( m_device == 0 ); + m_device = deviceData; + m_size = 0; + m_ptr = 0; + + m_uav = 0; + m_srv = 0; + + SELECT_DEVICEDATA( m_device->m_type, allocate( this, nElems, type ) ); + m_allocated = true; +} + +template +void Buffer::write(T* hostPtr, int nElems, int offsetNElems) +{ + ADLASSERT( nElems+offsetNElems <= m_size ); + SELECT_DEVICEDATA( m_device->m_type, copy(this, hostPtr, nElems, offsetNElems) ); +} + +template +void Buffer::read(T* hostPtr, int nElems, int offsetNElems) const +{ + SELECT_DEVICEDATA( m_device->m_type, copy(hostPtr,this, nElems, offsetNElems) ); +} + +template +void Buffer::write(Buffer& src, int nElems) +{ + ADLASSERT( nElems <= m_size ); + SELECT_DEVICEDATA( m_device->m_type, copy(this, &src, nElems) ); +} + +template +void Buffer::read(Buffer& dst, int nElems) const +{ + SELECT_DEVICEDATA( m_device->m_type, copy(&dst, this, nElems) ); +} +/* +template +Buffer& Buffer::operator = ( const Buffer& buffer ) +{ +// ADLASSERT( buffer.m_size <= m_size ); + + SELECT_DEVICEDATA( m_device->m_type, copy(this, &buffer, min2( m_size, buffer.m_size) ) ); + + return *this; +} +*/ + +template +__inline +static +typename Buffer* BufferUtils::map(const Device* device, const Buffer* in, int copySize) +{ + Buffer* native; + ADLASSERT( device->m_type == TYPE ); + + if( in->getType() == TYPE ) + native = (Buffer*)in; + else + { + ADLASSERT( copySize <= in->getSize() ); + copySize = (copySize==-1)? in->getSize() : copySize; + + native = new Buffer( device, copySize ); + if( COPY ) + { + if( in->getType() == TYPE_HOST ) + native->write( in->m_ptr, copySize ); + else if( native->getType() == TYPE_HOST ) + { + in->read( native->m_ptr, copySize ); + DeviceUtils::waitForCompletion( in->m_device ); + } + else + { + T* tmp = new T[copySize]; + in->read( tmp, copySize ); + DeviceUtils::waitForCompletion( in->m_device ); + native->write( tmp, copySize ); + DeviceUtils::waitForCompletion( native->m_device ); + delete [] tmp; + } + } + } + return native; +} + +template +__inline +static +void BufferUtils::unmap( Buffer* native, const Buffer* orig, int copySize ) +{ + if( native != orig ) + { + if( COPY ) + { + copySize = (copySize==-1)? orig->getSize() : copySize; + ADLASSERT( copySize <= orig->getSize() ); + if( orig->getType() == TYPE_HOST ) + { + native->read( orig->m_ptr, copySize ); + DeviceUtils::waitForCompletion( native->m_device ); + } + else if( native->getType() == TYPE_HOST ) + { + Buffer* dst = (Buffer*)orig; + dst->write( native->m_ptr, copySize ); + DeviceUtils::waitForCompletion( dst->m_device ); + } + else + { + T* tmp = new T[copySize]; + native->read( tmp, copySize ); + DeviceUtils::waitForCompletion( native->m_device ); + Buffer* dst = (Buffer*)orig; + dst->write( tmp, copySize ); + DeviceUtils::waitForCompletion( dst->m_device ); + delete [] tmp; + } + } + delete native; + } +} + + +template +T& HostBuffer::operator[](int idx) +{ + return m_ptr[idx]; +} + +template +const T& HostBuffer::operator[](int idx) const +{ + return m_ptr[idx]; +} + +template +HostBuffer& HostBuffer::operator = ( const Buffer& device ) +{ + ADLASSERT( device.m_size <= m_size ); + + SELECT_DEVICEDATA1( device.m_device, copy( m_ptr, &device, device.m_size ) ); + + return *this; +} + +#undef SELECT_DEVICEDATA + +}; diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/AdlConfig.h b/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/AdlConfig.h new file mode 100644 index 000000000..141c874c9 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/AdlConfig.h @@ -0,0 +1,27 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + + +//ADL_ENABLE_CL and ADL_ENABLE_DX11 can be set in the build system using C/C++ preprocessor defines +//#define ADL_ENABLE_CL +//#define ADL_ENABLE_DX11 + +//#define ADL_CL_FORCE_UNCACHE_KERNEL +#define ADL_CL_DUMP_MEMORY_LOG + +//load the kernels from string instead of loading them from file +#define ADL_LOAD_KERNEL_FROM_STRING +#define ADL_DUMP_DX11_ERROR diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/AdlError.h b/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/AdlError.h new file mode 100644 index 000000000..6d08e95a8 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/AdlError.h @@ -0,0 +1,80 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +#ifndef ADL_ERROR_H +#define ADL_ERROR_H + +#if defined(ADL_DUMP_DX11_ERROR) + #include +#endif +#ifdef _DEBUG + #include + #include + #include +#endif + + +namespace adl +{ + +#ifdef _DEBUG + #define ADLASSERT(x) if(!(x)){__debugbreak(); } +#else + #define ADLASSERT(x) if(x){} +#endif + +#ifdef _DEBUG + #define COMPILE_TIME_ASSERT(x) {int compileTimeAssertFailed[x]; compileTimeAssertFailed[0];} +#else + #define COMPILE_TIME_ASSERT(x) +#endif + +#ifdef _DEBUG + __inline + void debugPrintf(const char *fmt, ...) + { + va_list arg; + va_start(arg, fmt); +#if defined(ADL_DUMP_DX11_ERROR) + const int size = 1024*10; + char buf[size]; + vsprintf_s( buf, size, fmt, arg ); +#ifdef UNICODE + WCHAR wbuf[size]; + int sizeWide = MultiByteToWideChar(0,0,buf,-1,wbuf,0); + MultiByteToWideChar(0,0,buf,-1,wbuf,sizeWide); + +// swprintf_s( wbuf, 256, L"%s", buf ); + OutputDebugString( wbuf ); +#else + OutputDebugString( buf ); +#endif +#else + vprintf(fmt, arg); +#endif + va_end(arg); + } +#else + __inline + void debugPrintf(const char *fmt, ...) + { + } +#endif + +}; + +#endif + diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/AdlKernel.h b/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/AdlKernel.h new file mode 100644 index 000000000..1a785c1be --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/AdlKernel.h @@ -0,0 +1,142 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +#ifndef ADL_KERNEL_H +#define ADL_KERNEL_H + +#include +#include +#include + +namespace adl +{ + +//========================== +// Kernel +//========================== +struct Kernel +{ + DeviceType m_type; + void* m_kernel; +}; + +//========================== +// KernelManager +//========================== +class KernelManager +{ + public: + typedef std::map KMap; + + __inline + ~KernelManager(); + + __inline +// static + Kernel* query(const Device* dd, const char* fileName, const char* funcName, const char* option = NULL, const char* src = NULL, + bool cacheKernel = true); + + public: + KMap m_map; +}; + +//========================== +// Launcher +//========================== +class Launcher +{ + public: + struct BufferInfo + { + BufferInfo(){} + template + BufferInfo(Buffer* buff, bool isReadOnly = false): m_buffer(buff), m_isReadOnly(isReadOnly){} + + void* m_buffer; + bool m_isReadOnly; + }; + + __inline + Launcher(const Device* dd, char* fileName, char* funcName, char* option = NULL); + __inline + Launcher(const Device* dd, Kernel* kernel); + __inline + void setBuffers( BufferInfo* buffInfo, int n ); + template + __inline + void setConst( Buffer& constBuff, const T& consts ); + __inline + void launch1D( int numThreads, int localSize = 64 ); + __inline + void launch2D( int numThreadsX, int numThreadsY, int localSizeX = 8, int localSizeY = 8 ); + + public: + enum + { + CONST_BUFFER_SIZE = 512, + }; + + const Device* m_deviceData; + Kernel* m_kernel; + int m_idx; + int m_idxRw; +}; + +template +class KernelBuilder +{ + public: + + __inline + KernelBuilder(): m_ptr(0){} + + __inline + void setFromFile( const Device* deviceData, const char* fileName, const char* option = NULL, bool addExtension = false, + bool cacheKernel = true); + + __inline + void setFromSrc( const Device* deviceData, const char* src, const char* option = NULL ); + + __inline + void setFromSrcCached( const Device* deviceData, const char* src, const char* fileName, const char* option ); + + + __inline + void createKernel( const char* funcName, Kernel& kernelOut ); + + __inline + ~KernelBuilder(); + // todo. implemement in kernel destructor? + __inline + static void deleteKernel( Kernel& kernel ); + + private: + enum + { + MAX_PATH_LENGTH = 260, + }; + const Device* m_deviceData; +#ifdef UNICODE + wchar_t m_path[MAX_PATH_LENGTH]; +#else + char m_path[MAX_PATH_LENGTH]; +#endif + void* m_ptr; +}; + +}; + +#endif //ADL_KERNEL_H diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/AdlKernel.inl b/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/AdlKernel.inl new file mode 100644 index 000000000..9752b8cf6 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/AdlKernel.inl @@ -0,0 +1,223 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + + +#ifdef ADL_ENABLE_CL + #include +#endif +#ifdef ADL_ENABLE_DX11 + #include +#endif + +namespace adl +{ + +//========================== +// KernelManager +//========================== +Kernel* KernelManager::query(const Device* dd, const char* fileName, const char* funcName, const char* option, const char* src, + bool cacheKernel) +{ + printf("compiling kernel %s",funcName); + const int charSize = 1024*2; + KernelManager* s_kManager = this; + + char fullFineName[charSize]; + switch( dd->m_type ) + { + case TYPE_CL: +#if defined(ADL_ENABLE_CL) + sprintf_s(fullFineName,charSize,"%s.cl", fileName); + break; +#endif +#if defined(ADL_ENABLE_DX11) + case TYPE_DX11: + sprintf_s(fullFineName,charSize,"%s.hlsl", fileName); + break; +#endif + default: + ADLASSERT(0); + break; + }; + + char mapName[charSize]; + { + if( option ) + sprintf_s(mapName, charSize, "%d%s%s%s", (int)dd->getContext(), fullFineName, funcName, option); + else + sprintf_s(mapName, charSize, "%d%s%s", (int)dd->getContext(), fullFineName, funcName); + } + + std::string str(mapName); + + KMap::iterator iter = s_kManager->m_map.find( str ); + + Kernel* kernelOut; + if( iter == s_kManager->m_map.end() ) + { + kernelOut = new Kernel(); + + switch( dd->m_type ) + { +#if defined(ADL_ENABLE_CL) + case TYPE_CL: + { + KernelBuilder builder; + if( src ) + if (cacheKernel) + { + builder.setFromSrcCached( dd, src, fileName, option ); + } else + { + builder.setFromSrc( dd, src, option ); + } + else + builder.setFromFile( dd, fileName, option, true, cacheKernel ); + builder.createKernel( funcName, *kernelOut ); + } + break; +#endif +#if defined(ADL_ENABLE_DX11) + case TYPE_DX11: + { + KernelBuilder builder; + if( src ) + builder.setFromSrc( dd, src, option ); + else + builder.setFromFile( dd, fileName, option, true, cacheKernel ); + builder.createKernel( funcName, *kernelOut ); + } + break; +#endif + default: + ADLASSERT(0); + break; + }; + s_kManager->m_map.insert( KMap::value_type(str,kernelOut) ); + } + else + { + kernelOut = iter->second; + } + + printf(" ready\n"); + return kernelOut; +} + +KernelManager::~KernelManager() +{ + for(KMap::iterator iter = m_map.begin(); iter != m_map.end(); iter++) + { + Kernel* k = iter->second; + switch( k->m_type ) + { +#if defined(ADL_ENABLE_CL) + case TYPE_CL: + KernelBuilder::deleteKernel( *k ); + delete k; + break; +#endif +#if defined(ADL_ENABLE_DX11) + case TYPE_DX11: + KernelBuilder::deleteKernel( *k ); + delete k; + break; +#endif + default: + ADLASSERT(0); + break; + }; + } +} + +//========================== +// Launcher +//========================== + +#if defined(ADL_ENABLE_DX11) + #if defined(ADL_ENABLE_CL) + #define SELECT_LAUNCHER( type, func ) \ + switch( type ) \ + { \ + case TYPE_CL: LauncherCL::func; break; \ + case TYPE_DX11: LauncherDX11::func; break; \ + default: ADLASSERT(0); break; \ + }; + #else + #define SELECT_LAUNCHER( type, func ) \ + switch( type ) \ + { \ + case TYPE_DX11: LauncherDX11::func; break; \ + default: ADLASSERT(0); break; \ + }; + #endif +#else + #if defined(ADL_ENABLE_CL) + #define SELECT_LAUNCHER( type, func ) \ + switch( type ) \ + { \ + case TYPE_CL: LauncherCL::func; break; \ + default: ADLASSERT(0); break; \ + }; + #else + #define SELECT_LAUNCHER( type, func ) \ + switch( type ) \ + { \ + default: ADLASSERT(0); break; \ + }; + #endif +#endif + +Launcher::Launcher(const Device *dd, char *fileName, char *funcName, char *option) +{ + m_kernel = dd->getKernel( fileName, funcName, option ); + m_deviceData = dd; + m_idx = 0; + m_idxRw = 0; +} + +Launcher::Launcher(const Device* dd, Kernel* kernel) +{ + m_kernel = kernel; + m_deviceData = dd; + m_idx = 0; + m_idxRw = 0; +} + +void Launcher::setBuffers( BufferInfo* buffInfo, int n ) +{ + SELECT_LAUNCHER( m_deviceData->m_type, setBuffers( this, buffInfo, n ) ); +} + +template +void Launcher::setConst( Buffer& constBuff, const T& consts ) +{ + SELECT_LAUNCHER( m_deviceData->m_type, setConst( this, constBuff, consts ) ); +} + +void Launcher::launch1D( int numThreads, int localSize ) +{ + SELECT_LAUNCHER( m_deviceData->m_type, launch2D( this, numThreads, 1, localSize, 1 ) ); +} + +void Launcher::launch2D( int numThreadsX, int numThreadsY, int localSizeX, int localSizeY ) +{ + SELECT_LAUNCHER( m_deviceData->m_type, launch2D( this, numThreadsX, numThreadsY, localSizeX, localSizeY ) ); +} + +#undef SELECT_LAUNCHER + +}; \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/AdlStopwatch.h b/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/AdlStopwatch.h new file mode 100644 index 000000000..034f044a1 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/AdlStopwatch.h @@ -0,0 +1,81 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + + +#include + +namespace adl +{ + +struct StopwatchBase +{ + __inline + StopwatchBase(): m_device(0){} + __inline + StopwatchBase( const Device* deviceData ){ init(deviceData); } + __inline + virtual ~StopwatchBase(){} + + __inline + virtual void init( const Device* deviceData ) = 0; + __inline + virtual void start() = 0; + __inline + virtual void split() = 0; + __inline + virtual void stop() = 0; + __inline + virtual float getMs(int index=0) = 0; + __inline + virtual void getMs( float* times, int capacity ) = 0; + __inline + int getNIntervals() const{ return m_idx-1;} + + enum + { + CAPACITY = 64, + }; + + const Device* m_device; + int m_idx; +}; + +struct Stopwatch +{ + __inline + Stopwatch( const Device* deviceData = NULL ) { m_impl=0; if(deviceData) init(deviceData);} + __inline + ~Stopwatch(); + + __inline + void init( const Device* deviceData ); + __inline + void start(){if(!m_impl) init(0); m_impl->start();} + __inline + void split(){m_impl->split();} + __inline + void stop(){m_impl->stop();} + __inline + float getMs(){ return m_impl->getMs();} + __inline + void getMs( float* times, int capacity ){m_impl->getMs(times, capacity);} + __inline + int getNIntervals() const{return m_impl->getNIntervals();} + + StopwatchBase* m_impl; +}; + +}; diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/AdlStopwatch.inl b/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/AdlStopwatch.inl new file mode 100644 index 000000000..a825ec2c9 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/AdlStopwatch.inl @@ -0,0 +1,59 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +namespace adl +{ + +void Stopwatch::init( const Device* deviceData ) +{ + ADLASSERT( m_impl == 0 ); + + if( deviceData ) + { + switch( deviceData->m_type ) + { +#if defined(ADL_ENABLE_CL) + case TYPE_CL: + m_impl = new StopwatchHost;//StopwatchCL + break; +#endif +#if defined(ADL_ENABLE_DX11) + case TYPE_DX11: + m_impl = new StopwatchHost;//StopwatchDX11; + break; +#endif + case TYPE_HOST: + m_impl = new StopwatchHost; + break; + default: + ADLASSERT(0); + break; + }; + } + else + { + m_impl = new StopwatchHost; + } + m_impl->init( deviceData ); +} + +Stopwatch::~Stopwatch() +{ + if( m_impl == 0 ) return; + delete m_impl; +} + +}; \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/CL/AdlCL.inl b/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/CL/AdlCL.inl new file mode 100644 index 000000000..1b603a9c4 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/CL/AdlCL.inl @@ -0,0 +1,384 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + + +#pragma comment(lib,"OpenCL.lib") +#include +#include +#include + +namespace adl +{ + +struct DeviceCL : public Device +{ + typedef DeviceUtils::Config Config; + + + __inline + DeviceCL() : Device( TYPE_CL ), m_kernelManager(0){} + __inline + void* getContext() const { return m_context; } + __inline + void initialize(const Config& cfg); + __inline + void release(); + + template + __inline + void allocate(Buffer* buf, int nElems, BufferBase::BufferType type); + + template + __inline + void deallocate(Buffer* buf); + + template + __inline + void copy(Buffer* dst, const Buffer* src, int nElems,int srcOffsetNElems = 0,int dstOffsetNElems = 0); + + template + __inline + void copy(T* dst, const Buffer* src, int nElems, int srcOffsetNElems = 0); + + template + __inline + void copy(Buffer* dst, const T* src, int nElems, int dstOffsetNElems = 0); + + __inline + void waitForCompletion() const; + + __inline + void getDeviceName( char nameOut[128] ) const; + + __inline + static + int getNDevices(); + + __inline + Kernel* getKernel(const char* fileName, const char* funcName, const char* option = NULL, const char* src = NULL, bool cacheKernel = true )const; + + + enum + { + MAX_NUM_DEVICES = 6, + }; + + cl_context m_context; + cl_command_queue m_commandQueue; + + cl_device_id m_deviceIdx; + + KernelManager* m_kernelManager; +}; + +//=== +//=== + +void DeviceCL::initialize(const Config& cfg) +{ +// DeviceUtils::create( cfg, (DeviceCL*)this ); + { +// dd = new DeviceCL(); + + DeviceCL* deviceData = (DeviceCL*)this; + +// cl_device_type deviceType = (driverType == DRIVER_HARDWARE)? CL_DEVICE_TYPE_GPU:CL_DEVICE_TYPE_CPU; + cl_device_type deviceType = (cfg.m_type== Config::DEVICE_GPU)? CL_DEVICE_TYPE_GPU: CL_DEVICE_TYPE_CPU; +// int numContextQueuePairsToCreate = 1; + bool enableProfiling = false; +#ifdef _DEBUG + enableProfiling = true; +#endif + cl_int status; + + cl_platform_id platform; + { + cl_uint nPlatforms = 0; + status = clGetPlatformIDs(0, NULL, &nPlatforms); + ADLASSERT( status == CL_SUCCESS ); + + cl_platform_id pIdx[5]; + status = clGetPlatformIDs(nPlatforms, pIdx, NULL); + ADLASSERT( status == CL_SUCCESS ); + + cl_uint atiIdx = -1; + cl_uint intelIdx = -1; + cl_uint nvIdx = -1; + + for(cl_uint i=0; i0) + { + if( strcmp( buff, "NVIDIA Corporation" )==0 ) nvIdx = i; + if( strcmp( buff, "Advanced Micro Devices, Inc." )==0 ) atiIdx = i; + if( strcmp( buff, "Intel(R) Corporation" )==0 ) intelIdx = i; + } + } + + if( deviceType == CL_DEVICE_TYPE_GPU ) + { + switch( cfg.m_vendor ) + { + case DeviceUtils::Config::VD_AMD: + if( atiIdx == -1 && nvIdx != -1 ) goto USE_NV_GPU; +USE_AMD_GPU: + ADLASSERT(atiIdx != -1 ); + platform = pIdx[atiIdx]; + break; + case DeviceUtils::Config::VD_NV: + if( atiIdx != -1 && nvIdx == -1 ) goto USE_AMD_GPU; +USE_NV_GPU: + ADLASSERT(nvIdx != -1 ); + platform = pIdx[nvIdx]; + break; + default: + ADLASSERT(0); + break; + }; + } + else if( deviceType == CL_DEVICE_TYPE_CPU ) + { + switch( cfg.m_vendor ) + { + case DeviceUtils::Config::VD_AMD: + ADLASSERT(atiIdx != -1 ); + platform = pIdx[atiIdx]; + break; + case DeviceUtils::Config::VD_INTEL: + ADLASSERT(intelIdx != -1 ); + platform = pIdx[intelIdx]; + break; + default: + ADLASSERT(0); + break; + }; + } + } + + cl_uint numDevice; + status = clGetDeviceIDs( platform, deviceType, 0, NULL, &numDevice ); + +// ADLASSERT( cfg.m_deviceIdx < (int)numDevice ); + + debugPrintf("CL: %d %s Devices ", numDevice, (deviceType==CL_DEVICE_TYPE_GPU)? "GPU":"CPU"); + +// numContextQueuePairsToCreate = min( (int)numDevice, numContextQueuePairsToCreate ); +// numContextQueuePairsToCreate = ( (int)numDevice < numContextQueuePairsToCreate )? numDevice : numContextQueuePairsToCreate; + + cl_device_id deviceIds[ MAX_NUM_DEVICES ]; + + status = clGetDeviceIDs( platform, deviceType, numDevice, deviceIds, NULL ); + ADLASSERT( status == CL_SUCCESS ); + + { int i = min( (int)numDevice-1, cfg.m_deviceIdx ); + m_deviceIdx = deviceIds[i]; + deviceData->m_context = clCreateContext( NULL, 1, &deviceData->m_deviceIdx, NULL, NULL, &status ); + ADLASSERT( status == CL_SUCCESS ); + + char buff[512]; + status = clGetDeviceInfo( deviceData->m_deviceIdx, CL_DEVICE_NAME, sizeof(buff), &buff, NULL ); + ADLASSERT( status == CL_SUCCESS ); + + debugPrintf("[%s]\n", buff); + + deviceData->m_commandQueue = clCreateCommandQueue( deviceData->m_context, deviceData->m_deviceIdx, (enableProfiling)?CL_QUEUE_PROFILING_ENABLE:NULL, NULL ); + + ADLASSERT( status == CL_SUCCESS ); + + // status = clSetCommandQueueProperty( commandQueue, CL_QUEUE_PROFILING_ENABLE, CL_TRUE, 0 ); + // CLASSERT( status == CL_SUCCESS ); + + if(0) + { + cl_bool image_support; + clGetDeviceInfo(deviceData->m_deviceIdx, CL_DEVICE_IMAGE_SUPPORT, sizeof(image_support), &image_support, NULL); + debugPrintf(" CL_DEVICE_IMAGE_SUPPORT : %s\n", image_support?"Yes":"No"); + } + } + } + + m_kernelManager = new KernelManager; +} + +void DeviceCL::release() +{ + clReleaseCommandQueue( m_commandQueue ); + clReleaseContext( m_context ); + + if( m_kernelManager ) delete m_kernelManager; +} + +template +void DeviceCL::allocate(Buffer* buf, int nElems, BufferBase::BufferType type) +{ + buf->m_device = this; + buf->m_size = nElems; + buf->m_ptr = 0; + + if( type == BufferBase::BUFFER_CONST ) return; + +#if defined(ADL_CL_DUMP_MEMORY_LOG) + char deviceName[256]; + getDeviceName( deviceName ); + printf( "adlCLMemoryLog %s : %3.2fMB Allocation: %3.2fKB ", deviceName, m_memoryUsage/1024.f/1024.f, sizeof(T)*nElems/1024.f ); + fflush( stdout ); +#endif + + int sz=sizeof(T)*nElems; + + cl_int status = 0; + if( type == BufferBase::BUFFER_ZERO_COPY ) + buf->m_ptr = (T*)clCreateBuffer( m_context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, sz, 0, &status ); + else if( type == BufferBase::BUFFER_RAW ) + buf->m_ptr = (T*)clCreateBuffer( m_context, CL_MEM_WRITE_ONLY, sz, 0, &status ); + else + buf->m_ptr = (T*)clCreateBuffer( m_context, CL_MEM_READ_WRITE, sz, 0, &status ); + + m_memoryUsage += buf->m_size*sizeof(T); +#if defined(ADL_CL_DUMP_MEMORY_LOG) + printf( "%s\n", (status==CL_SUCCESS)? "Succeed": "Failed" ); + fflush( stdout ); +#endif + ADLASSERT( status == CL_SUCCESS ); +} + +template +void DeviceCL::deallocate(Buffer* buf) +{ + if( buf->m_ptr ) + { + m_memoryUsage -= buf->m_size*sizeof(T); + clReleaseMemObject( (cl_mem)buf->m_ptr ); + } + buf->m_device = 0; + buf->m_size = 0; + buf->m_ptr = 0; +} + +template +void DeviceCL::copy(Buffer* dst, const Buffer* src, int nElems,int srcOffsetNElems,int dstOffsetNElems ) +{ + if( dst->m_device->m_type == TYPE_CL && src->m_device->m_type == TYPE_CL ) + { + cl_int status = 0; + status = clEnqueueCopyBuffer( m_commandQueue, (cl_mem)src->m_ptr, (cl_mem)dst->m_ptr, sizeof(T)*srcOffsetNElems, sizeof(T)*dstOffsetNElems, sizeof(T)*nElems, 0, 0, 0 ); + ADLASSERT( status == CL_SUCCESS ); + } + else if( src->m_device->m_type == TYPE_HOST ) + { + ADLASSERT( dst->getType() == TYPE_CL ); + dst->write( src->m_ptr, nElems ); + } + else if( dst->m_device->m_type == TYPE_HOST ) + { + ADLASSERT( src->getType() == TYPE_CL ); + src->read( dst->m_ptr, nElems ); + } + else + { + ADLASSERT( 0 ); + } +} + +template +void DeviceCL::copy(T* dst, const Buffer* src, int nElems, int srcOffsetNElems ) +{ + cl_int status = 0; + status = clEnqueueReadBuffer( m_commandQueue, (cl_mem)src->m_ptr, 0, sizeof(T)*srcOffsetNElems, sizeof(T)*nElems, + dst, 0,0,0 ); + ADLASSERT( status == CL_SUCCESS ); +} + +template +void DeviceCL::copy(Buffer* dst, const T* src, int nElems, int dstOffsetNElems ) +{ + cl_int status = 0; + int sz=sizeof(T)*nElems; + status = clEnqueueWriteBuffer( m_commandQueue, (cl_mem)dst->m_ptr, 0, sizeof(T)*dstOffsetNElems, sz, + src, 0,0,0 ); + ADLASSERT( status == CL_SUCCESS ); +} + +void DeviceCL::waitForCompletion() const +{ + clFinish( m_commandQueue ); +} + +int DeviceCL::getNDevices() +{ + cl_device_type deviceType = CL_DEVICE_TYPE_GPU; + cl_int status; + + cl_platform_id platform; + { + cl_uint nPlatforms = 0; + status = clGetPlatformIDs(0, NULL, &nPlatforms); + ADLASSERT( status == CL_SUCCESS ); + + cl_platform_id pIdx[5]; + status = clGetPlatformIDs(nPlatforms, pIdx, NULL); + ADLASSERT( status == CL_SUCCESS ); + + cl_uint nvIdx = -1; + cl_uint atiIdx = -1; + for(cl_uint i=0; iquery( this, fileName, funcName, option, src, cacheKernel ); +} + +}; diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/CL/AdlKernelUtilsCL.inl b/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/CL/AdlKernelUtilsCL.inl new file mode 100644 index 000000000..513478a35 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/CL/AdlKernelUtilsCL.inl @@ -0,0 +1,541 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + + + + +namespace adl +{ + +struct KernelCL : public Kernel +{ + cl_kernel& getKernel() { return (cl_kernel&)m_kernel; } +}; + +static const char* strip(const char* name, const char* pattern) +{ + size_t const patlen = strlen(pattern); + size_t patcnt = 0; + const char * oriptr; + const char * patloc; + // find how many times the pattern occurs in the original string + for (oriptr = name; patloc = strstr(oriptr, pattern); oriptr = patloc + patlen) + { + patcnt++; + } + return oriptr; +} + +static bool isFileUpToDate(const char* binaryFileName,const char* srcFileName) + +{ + bool fileUpToDate = false; + + bool binaryFileValid=false; + FILETIME modtimeBinary; + + int nameLength = (int)strlen(binaryFileName)+1; +#ifdef UNICODE + WCHAR* fName = new WCHAR[nameLength]; + MultiByteToWideChar(CP_ACP,0,binaryFileName,-1, fName, nameLength); + HANDLE binaryFileHandle = CreateFile(fName,GENERIC_READ,0,0,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,0); + delete [] fName; +#else + HANDLE binaryFileHandle = CreateFile(binaryFileName,GENERIC_READ,0,0,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,0); +#endif + if (binaryFileHandle ==INVALID_HANDLE_VALUE) + { + DWORD errorCode; + errorCode = GetLastError(); + switch (errorCode) + { + case ERROR_FILE_NOT_FOUND: + { + debugPrintf("\nCached file not found %s\n", binaryFileName); + break; + } + case ERROR_PATH_NOT_FOUND: + { + debugPrintf("\nCached file path not found %s\n", binaryFileName); + break; + } + default: + { + debugPrintf("\nFailed reading cached file with errorCode = %d\n", errorCode); + } + } + } else + { + if (GetFileTime(binaryFileHandle, NULL, NULL, &modtimeBinary)==0) + { + DWORD errorCode; + errorCode = GetLastError(); + debugPrintf("\nGetFileTime errorCode = %d\n", errorCode); + } else + { + binaryFileValid = true; + } + CloseHandle(binaryFileHandle); + } + + if (binaryFileValid) + { +#ifdef UNICODE + int nameLength = (int)strlen(srcFileName)+1; + WCHAR* fName = new WCHAR[nameLength]; + MultiByteToWideChar(CP_ACP,0,srcFileName,-1, fName, nameLength); + HANDLE srcFileHandle = CreateFile(fName,GENERIC_READ,0,0,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,0); + delete [] fName; +#else + HANDLE srcFileHandle = CreateFile(srcFileName,GENERIC_READ,0,0,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,0); +#endif + if (srcFileHandle!=INVALID_HANDLE_VALUE) + { + FILETIME modtimeSrc; + if (GetFileTime(srcFileHandle, NULL, NULL, &modtimeSrc)==0) + { + DWORD errorCode; + errorCode = GetLastError(); + debugPrintf("\nGetFileTime errorCode = %d\n", errorCode); + } + if ( ( modtimeSrc.dwHighDateTime < modtimeBinary.dwHighDateTime) + ||(( modtimeSrc.dwHighDateTime == modtimeBinary.dwHighDateTime)&&(modtimeSrc.dwLowDateTime <= modtimeBinary.dwLowDateTime))) + { + fileUpToDate=true; + } else + { + debugPrintf("\nCached binary file found (%s), but out-of-date\n",binaryFileName); + } + CloseHandle(srcFileHandle); + } + else + { +#ifdef _DEBUG + DWORD errorCode; + errorCode = GetLastError(); + switch (errorCode) + { + case ERROR_FILE_NOT_FOUND: + { + debugPrintf("\nSrc file not found %s\n", srcFileName); + break; + } + case ERROR_PATH_NOT_FOUND: + { + debugPrintf("\nSrc path not found %s\n", srcFileName); + break; + } + default: + { + debugPrintf("\nnSrc file reading errorCode = %d\n", errorCode); + } + } + ADLASSERT(0); +#else + //if we cannot find the source, assume it is OK in release builds + fileUpToDate = true; +#endif + } + } + + + return fileUpToDate; +} + +template<> +void KernelBuilder::setFromFile( const Device* deviceData, const char* fileName, const char* option, bool addExtension, + bool cacheKernel) +{ + m_deviceData = deviceData; + + char fileNameWithExtension[256]; + + if( addExtension ) + sprintf_s( fileNameWithExtension, "%s.cl", fileName ); + else + sprintf_s( fileNameWithExtension, "%s", fileName ); + + class File + { + public: + __inline + bool open(const char* fileNameWithExtension) + { + size_t size; + char* str; + + // Open file stream + std::fstream f(fileNameWithExtension, (std::fstream::in | std::fstream::binary)); + + // Check if we have opened file stream + if (f.is_open()) { + size_t sizeFile; + // Find the stream size + f.seekg(0, std::fstream::end); + size = sizeFile = (size_t)f.tellg(); + f.seekg(0, std::fstream::beg); + + str = new char[size + 1]; + if (!str) { + f.close(); + return NULL; + } + + // Read file + f.read(str, sizeFile); + f.close(); + str[size] = '\0'; + + m_source = str; + + delete[] str; + + return true; + } + + return false; + } + const std::string& getSource() const {return m_source;} + + private: + std::string m_source; + }; + + cl_program& program = (cl_program&)m_ptr; + cl_int status = 0; + + bool cacheBinary = cacheKernel; +#if defined(ADL_CL_FORCE_UNCACHE_KERNEL) + cacheBinary = false; +#endif + + char binaryFileName[512]; + { + char deviceName[256]; + deviceData->getDeviceName(deviceName); + char driverVersion[256]; + const DeviceCL* dd = (const DeviceCL*) deviceData; + clGetDeviceInfo(dd->m_deviceIdx, CL_DRIVER_VERSION, 256, &driverVersion, NULL); + const char* strippedFileName = strip(fileName,"\\"); + strippedFileName = strip(strippedFileName,"/"); + + sprintf_s(binaryFileName,"cache/%s.%s.%s.bin",strippedFileName, deviceName,driverVersion ); + } + + bool upToDate = isFileUpToDate(binaryFileName,fileNameWithExtension); + + if( cacheBinary && upToDate) + { + FILE* file = fopen(binaryFileName, "rb"); + + if( file ) + { + fseek( file, 0L, SEEK_END ); + size_t binarySize = ftell( file ); + + rewind( file ); + char* binary = new char[binarySize]; + fread( binary, sizeof(char), binarySize, file ); + fclose( file ); + + if (binarySize) + { + const DeviceCL* dd = (const DeviceCL*) deviceData; + program = clCreateProgramWithBinary( dd->m_context, 1, &dd->m_deviceIdx, &binarySize, (const unsigned char**)&binary, 0, &status ); + ADLASSERT( status == CL_SUCCESS ); + status = clBuildProgram( program, 1, &dd->m_deviceIdx, option, 0, 0 ); + ADLASSERT( status == CL_SUCCESS ); + if( status != CL_SUCCESS ) + { + char *build_log; + size_t ret_val_size; + clGetProgramBuildInfo(program, dd->m_deviceIdx, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size); + build_log = new char[ret_val_size+1]; + clGetProgramBuildInfo(program, dd->m_deviceIdx, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL); + + build_log[ret_val_size] = '\0'; + + debugPrintf("%s\n", build_log); + + delete build_log; + ADLASSERT(0); + } + + } + } + } + if( !m_ptr ) + { + File kernelFile; + ADLASSERT( kernelFile.open( fileNameWithExtension ) ); + const char* source = kernelFile.getSource().c_str(); + setFromSrc( m_deviceData, source, option ); + + if( cacheBinary ) + { // write to binary + size_t binarySize; + status = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binarySize, 0 ); + ADLASSERT( status == CL_SUCCESS ); + + char* binary = new char[binarySize]; + + status = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof(char*), &binary, 0 ); + ADLASSERT( status == CL_SUCCESS ); + + { + FILE* file = fopen(binaryFileName, "wb"); + if (file) + { + fwrite( binary, sizeof(char), binarySize, file ); + fclose( file ); + } + } + + delete [] binary; + } + } +} + + + +template<> +void KernelBuilder::setFromSrcCached( const Device* deviceData, const char* src, const char* fileName, const char* option ) +{ + m_deviceData = deviceData; + + bool cacheBinary = true; + cl_program& program = (cl_program&)m_ptr; + cl_int status = 0; + + char binaryFileName[512]; + { + char deviceName[256]; + deviceData->getDeviceName(deviceName); + char driverVersion[256]; + const DeviceCL* dd = (const DeviceCL*) deviceData; + clGetDeviceInfo(dd->m_deviceIdx, CL_DRIVER_VERSION, 256, &driverVersion, NULL); + + const char* strippedFileName = strip(fileName,"\\"); + strippedFileName = strip(strippedFileName,"/"); + + sprintf_s(binaryFileName,"cache/%s.%s.%s.bin",strippedFileName, deviceName,driverVersion ); + } + + + char fileNameWithExtension[256]; + sprintf_s(fileNameWithExtension,"%s.cl",fileName, ".cl"); + + bool upToDate = isFileUpToDate(binaryFileName,fileNameWithExtension); + + + if( cacheBinary ) + { + + bool fileUpToDate = isFileUpToDate(binaryFileName,fileNameWithExtension); + + if( fileUpToDate) + { + FILE* file = fopen(binaryFileName, "rb"); + if (file) + { + fseek( file, 0L, SEEK_END ); + size_t binarySize = ftell( file ); + rewind( file ); + char* binary = new char[binarySize]; + fread( binary, sizeof(char), binarySize, file ); + fclose( file ); + + const DeviceCL* dd = (const DeviceCL*) deviceData; + program = clCreateProgramWithBinary( dd->m_context, 1, &dd->m_deviceIdx, &binarySize, (const unsigned char**)&binary, 0, &status ); + ADLASSERT( status == CL_SUCCESS ); + status = clBuildProgram( program, 1, &dd->m_deviceIdx, option, 0, 0 ); + ADLASSERT( status == CL_SUCCESS ); + + if( status != CL_SUCCESS ) + { + char *build_log; + size_t ret_val_size; + clGetProgramBuildInfo(program, dd->m_deviceIdx, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size); + build_log = new char[ret_val_size+1]; + clGetProgramBuildInfo(program, dd->m_deviceIdx, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL); + + build_log[ret_val_size] = '\0'; + + debugPrintf("%s\n", build_log); + + delete build_log; + ADLASSERT(0); + } + delete[] binary; + } + } + } + + + if( !m_ptr ) + { + + setFromSrc( deviceData, src, option ); + + if( cacheBinary ) + { // write to binary + cl_uint numAssociatedDevices; + status = clGetProgramInfo( program, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &numAssociatedDevices, 0 ); + ADLASSERT( status == CL_SUCCESS ); + if (numAssociatedDevices==1) + { + + + size_t binarySize; + status = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binarySize, 0 ); + ADLASSERT( status == CL_SUCCESS ); + + char* binary = new char[binarySize]; + + status = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof(char*), &binary, 0 ); + ADLASSERT( status == CL_SUCCESS ); + + { + FILE* file = fopen(binaryFileName, "wb"); + if (file) + { + fwrite( binary, sizeof(char), binarySize, file ); + fclose( file ); + } + } + + delete [] binary; + } + } + } +} + + +template<> +void KernelBuilder::setFromSrc( const Device* deviceData, const char* src, const char* option ) +{ + ADLASSERT( deviceData->m_type == TYPE_CL ); + m_deviceData = deviceData; + const DeviceCL* dd = (const DeviceCL*) deviceData; + + cl_program& program = (cl_program&)m_ptr; + cl_int status = 0; + size_t srcSize[] = {strlen( src )}; + program = clCreateProgramWithSource( dd->m_context, 1, &src, srcSize, &status ); + ADLASSERT( status == CL_SUCCESS ); + status = clBuildProgram( program, 1, &dd->m_deviceIdx, option, NULL, NULL ); + if( status != CL_SUCCESS ) + { + char *build_log; + size_t ret_val_size; + clGetProgramBuildInfo(program, dd->m_deviceIdx, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size); + build_log = new char[ret_val_size+1]; + clGetProgramBuildInfo(program, dd->m_deviceIdx, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL); + + build_log[ret_val_size] = '\0'; + + debugPrintf("%s\n", build_log); + printf("%s\n", build_log); + + ADLASSERT(0); + delete build_log; + + } +} + +template<> +KernelBuilder::~KernelBuilder() +{ + cl_program program = (cl_program)m_ptr; + clReleaseProgram( program ); +} + +template<> +void KernelBuilder::createKernel( const char* funcName, Kernel& kernelOut ) +{ + KernelCL* clKernel = (KernelCL*)&kernelOut; + + cl_program program = (cl_program)m_ptr; + cl_int status = 0; + clKernel->getKernel() = clCreateKernel(program, funcName, &status ); + ADLASSERT( status == CL_SUCCESS ); + + kernelOut.m_type = TYPE_CL; +} + +template<> +void KernelBuilder::deleteKernel( Kernel& kernel ) +{ + KernelCL* clKernel = (KernelCL*)&kernel; + clReleaseKernel( clKernel->getKernel() ); +} + + + +class LauncherCL +{ + public: + typedef Launcher::BufferInfo BufferInfo; + + __inline + static void setBuffers( Launcher* launcher, BufferInfo* buffInfo, int n ); + template + __inline + static void setConst( Launcher* launcher, Buffer& constBuff, const T& consts ); + __inline + static void launch2D( Launcher* launcher, int numThreadsX, int numThreadsY, int localSizeX, int localSizeY ); +}; + +void LauncherCL::setBuffers( Launcher* launcher, BufferInfo* buffInfo, int n ) +{ + KernelCL* clKernel = (KernelCL*)launcher->m_kernel; + for(int i=0; i* buff = (Buffer*)buffInfo[i].m_buffer; + cl_int status = clSetKernelArg( clKernel->getKernel(), launcher->m_idx++, sizeof(cl_mem), &buff->m_ptr ); + ADLASSERT( status == CL_SUCCESS ); + } +} + +template +void LauncherCL::setConst( Launcher* launcher, Buffer& constBuff, const T& consts ) +{ + KernelCL* clKernel = (KernelCL*)launcher->m_kernel; + int sz=sizeof(T); + cl_int status = clSetKernelArg( clKernel->getKernel(), launcher->m_idx++, sz, &consts ); + ADLASSERT( status == CL_SUCCESS ); +} + +void LauncherCL::launch2D( Launcher* launcher, int numThreadsX, int numThreadsY, int localSizeX, int localSizeY ) +{ + KernelCL* clKernel = (KernelCL*)launcher->m_kernel; + const DeviceCL* ddcl = (const DeviceCL*)launcher->m_deviceData; + size_t gRange[3] = {1,1,1}; + size_t lRange[3] = {1,1,1}; + lRange[0] = localSizeX; + lRange[1] = localSizeY; + gRange[0] = max((size_t)1, (numThreadsX/lRange[0])+(!(numThreadsX%lRange[0])?0:1)); + gRange[0] *= lRange[0]; + gRange[1] = max((size_t)1, (numThreadsY/lRange[1])+(!(numThreadsY%lRange[1])?0:1)); + gRange[1] *= lRange[1]; + + cl_int status = clEnqueueNDRangeKernel( ddcl->m_commandQueue, + clKernel->getKernel(), 2, NULL, gRange, lRange, 0,0,0 ); + ADLASSERT( status == CL_SUCCESS ); +} + + +}; \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/DX11/AdlDX11.inl b/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/DX11/AdlDX11.inl new file mode 100644 index 000000000..66abde98e --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/DX11/AdlDX11.inl @@ -0,0 +1,512 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +#include +#include +#include +#include +#include +#pragma comment(lib,"d3dx11.lib") +#pragma comment(lib,"d3d11.lib") +#pragma comment(lib,"DXGI.lib") + +namespace adl +{ + +#define u32 unsigned int + +struct DeviceDX11 : public Device +{ + typedef DeviceUtils::Config Config; + + + __inline + DeviceDX11() : Device( TYPE_DX11 ), m_kernelManager(0){} + __inline + void* getContext() const { return m_context; } + __inline + void initialize(const Config& cfg); + __inline + void release(); + + template + __inline + void allocate(Buffer* buf, int nElems, BufferBase::BufferType type); + + template + __inline + void deallocate(Buffer* buf); + + template + __inline + void copy(Buffer* dst, const Buffer* src, int nElems); + + template + __inline + void copy(T* dst, const Buffer* src, int nElems, int srcOffsetNElems = 0); + + template + __inline + void copy(Buffer* dst, const T* src, int nElems, int dstOffsetNElems = 0); + + __inline + void waitForCompletion() const; + + __inline + void getDeviceName( char nameOut[128] ) const; + + __inline + static + int getNDevices(); + + __inline + Kernel* getKernel(const char* fileName, const char* funcName, const char* option = NULL, const char* src = NULL, bool cacheKernel = true )const; + + + ID3D11DeviceContext* m_context; + ID3D11Device* m_device; + IDXGISwapChain* m_swapChain; + + KernelManager* m_kernelManager; +}; + +template +struct BufferDX11 : public Buffer +{ + ID3D11Buffer* getBuffer() { return (ID3D11Buffer*)m_ptr; } + ID3D11UnorderedAccessView* getUAV() { return (ID3D11UnorderedAccessView*)m_uav; } + ID3D11ShaderResourceView* getSRV() { return (ID3D11ShaderResourceView*)m_srv; } + + ID3D11Buffer** getBufferPtr() { return (ID3D11Buffer**)&m_ptr; } + ID3D11UnorderedAccessView** getUAVPtr() { return (ID3D11UnorderedAccessView**)&m_uav; } + ID3D11ShaderResourceView** getSRVPtr() { return (ID3D11ShaderResourceView**)&m_srv; } +}; + +#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } } + + +void DeviceDX11::initialize(const Config& cfg) +{ + DeviceDX11* deviceData = this; + + HRESULT hr = S_OK; + UINT createDeviceFlg = 0; +#ifdef _DEBUG + createDeviceFlg |= D3D11_CREATE_DEVICE_DEBUG; +#endif + D3D_FEATURE_LEVEL fl[] = { + D3D_FEATURE_LEVEL_11_0, + D3D_FEATURE_LEVEL_10_1, + D3D_FEATURE_LEVEL_10_0 + }; + +typedef HRESULT (WINAPI * LPD3D11CREATEDEVICE)( IDXGIAdapter*, D3D_DRIVER_TYPE, HMODULE, u32, D3D_FEATURE_LEVEL*, UINT, u32, ID3D11Device**, D3D_FEATURE_LEVEL*, ID3D11DeviceContext** ); + + HMODULE moduleD3D11 = 0; +#ifdef UNICODE + moduleD3D11 = LoadLibrary( L"d3d11.dll" ); +#else + moduleD3D11 = LoadLibrary( "d3d11.dll" ); +#endif + ADLASSERT( moduleD3D11 ); + + LPD3D11CREATEDEVICE _DynamicD3D11CreateDevice; + _DynamicD3D11CreateDevice = ( LPD3D11CREATEDEVICE )GetProcAddress( moduleD3D11, "D3D11CreateDevice" ); + + D3D_DRIVER_TYPE type = D3D_DRIVER_TYPE_HARDWARE; + // http://msdn.microsoft.com/en-us/library/ff476082(v=VS.85).aspx + // If you set the pAdapter parameter to a non-NULL value, you must also set the DriverType parameter to the D3D_DRIVER_TYPE_UNKNOWN value. If you set the pAdapter parameter to a non-NULL value and the DriverType parameter to the D3D_DRIVER_TYPE_HARDWARE value, D3D11CreateDevice returns an HRESULT of E_INVALIDARG. + type = D3D_DRIVER_TYPE_UNKNOWN; +/* + // Create a hardware Direct3D 11 device + hr = _DynamicD3D11CreateDevice( NULL, + type, NULL, createDeviceFlg, + fl, _countof(fl), D3D11_SDK_VERSION, &deviceData->m_device, NULL, &deviceData->m_context ); +*/ + IDXGIAdapter* adapter = NULL; + {// get adapter of the index + IDXGIFactory* factory = NULL; + int targetAdapterIdx = cfg.m_deviceIdx;//min( cfg.m_deviceIdx, getNDevices()-1 ); + CreateDXGIFactory( __uuidof(IDXGIFactory), (void**)&factory ); + + u32 i = 0; + while( factory->EnumAdapters( i, &adapter ) != DXGI_ERROR_NOT_FOUND ) + { + if( i== targetAdapterIdx ) break; + i++; + } + factory->Release(); + } + + // Create a hardware Direct3D 11 device + hr = D3D11CreateDevice( adapter, + type, + NULL, createDeviceFlg, + fl, _countof(fl), D3D11_SDK_VERSION, &deviceData->m_device, NULL, &deviceData->m_context ); + + ADLASSERT( hr == S_OK ); + + // Check if the hardware device supports Compute Shader 4.0 + D3D11_FEATURE_DATA_D3D10_X_HARDWARE_OPTIONS hwopts; + deviceData->m_device->CheckFeatureSupport(D3D11_FEATURE_D3D10_X_HARDWARE_OPTIONS, &hwopts, sizeof(hwopts)); + + if( !hwopts.ComputeShaders_Plus_RawAndStructuredBuffers_Via_Shader_4_x ) + { + SAFE_RELEASE( deviceData->m_context ); + SAFE_RELEASE( deviceData->m_device ); + + debugPrintf("DX11 GPU is not present\n"); + ADLASSERT( 0 ); + } + + m_kernelManager = new KernelManager; +} + +void DeviceDX11::release() +{ + SAFE_RELEASE( m_context ); + SAFE_RELEASE( m_device ); + + if( m_kernelManager ) delete m_kernelManager; +} + +template +void DeviceDX11::allocate(Buffer* buf, int nElems, BufferBase::BufferType type) +{ + ADLASSERT( type != BufferBase::BUFFER_ZERO_COPY ); + + DeviceDX11* deviceData = this; + buf->m_device = deviceData; + buf->m_size = nElems; + BufferDX11* dBuf = (BufferDX11*)buf; + +// if( type & BufferBase::BUFFER ) + { + HRESULT hr = S_OK; + + if( type == BufferBase::BUFFER_CONST ) + { + ADLASSERT( nElems == 1 ); + D3D11_BUFFER_DESC constant_buffer_desc; + ZeroMemory( &constant_buffer_desc, sizeof(constant_buffer_desc) ); +// constant_buffer_desc.ByteWidth = NEXTMULTIPLEOF( sizeof(T), 16 ); + constant_buffer_desc.ByteWidth = (((sizeof(T))/(16) + (((sizeof(T))%(16)==0)?0:1))*(16)); +// constant_buffer_desc.Usage = D3D11_USAGE_DYNAMIC; +// constant_buffer_desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; +// constant_buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + constant_buffer_desc.Usage = D3D11_USAGE_DEFAULT; + constant_buffer_desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + constant_buffer_desc.CPUAccessFlags = 0; + + hr = deviceData->m_device->CreateBuffer( &constant_buffer_desc, NULL, dBuf->getBufferPtr() ); + ADLASSERT( hr == S_OK ); + return; + } + + D3D11_BUFFER_DESC buffer_desc; + ZeroMemory(&buffer_desc, sizeof(buffer_desc)); + buffer_desc.ByteWidth = nElems * sizeof(T); + + if( type != BufferBase::BUFFER_RAW ) + { + buffer_desc.StructureByteStride = sizeof(T); +// buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; + } + + if( type == BufferBase::BUFFER_STAGING ) + { + buffer_desc.Usage = D3D11_USAGE_STAGING; + buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; + } + else if( type == BufferBase::BUFFER_INDEX ) + { + buffer_desc.Usage = D3D11_USAGE_DEFAULT; + buffer_desc.BindFlags = D3D11_BIND_INDEX_BUFFER; + } + else if( type == BufferBase::BUFFER_VERTEX ) + { + buffer_desc.Usage = D3D11_USAGE_DEFAULT; + buffer_desc.BindFlags = D3D11_BIND_VERTEX_BUFFER; + } + else + { + buffer_desc.Usage = D3D11_USAGE_DEFAULT; + + buffer_desc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE; + buffer_desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED; + +// check this + if(type == BufferBase::BUFFER_RAW) + { +// buffer_desc.BindFlags |= D3D11_BIND_INDEX_BUFFER | D3D11_BIND_VERTEX_BUFFER; + buffer_desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS | D3D11_RESOURCE_MISC_DRAWINDIRECT_ARGS; // need this to be used for DispatchIndirect + } + } + hr = deviceData->m_device->CreateBuffer(&buffer_desc, NULL, dBuf->getBufferPtr()); + + ADLASSERT( hr == S_OK ); + + if( type == BufferBase::BUFFER_INDEX ) return; + + if( type == BufferBase::BUFFER || + type == BufferBase::BUFFER_RAW || + type == BufferBase::BUFFER_W_COUNTER ) + { + // Create UAVs for all CS buffers + D3D11_UNORDERED_ACCESS_VIEW_DESC uavbuffer_desc; + ZeroMemory(&uavbuffer_desc, sizeof(uavbuffer_desc)); + uavbuffer_desc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER; + + if( type == BufferBase::BUFFER_RAW ) + { + uavbuffer_desc.Format = DXGI_FORMAT_R32_TYPELESS; + uavbuffer_desc.Buffer.Flags = D3D11_BUFFER_UAV_FLAG_RAW; + uavbuffer_desc.Buffer.NumElements = buffer_desc.ByteWidth / 4; + } + else + { + uavbuffer_desc.Format = DXGI_FORMAT_UNKNOWN; + uavbuffer_desc.Buffer.NumElements = nElems; + } + + if( type == BufferBase::BUFFER_W_COUNTER ) + { + uavbuffer_desc.Buffer.Flags = D3D11_BUFFER_UAV_FLAG_COUNTER; + } + + hr = deviceData->m_device->CreateUnorderedAccessView(dBuf->getBuffer(), &uavbuffer_desc, dBuf->getUAVPtr()); + ADLASSERT( hr == S_OK ); + + // Create SRVs for all CS buffers + D3D11_SHADER_RESOURCE_VIEW_DESC srvbuffer_desc; + ZeroMemory(&srvbuffer_desc, sizeof(srvbuffer_desc)); + if( type == BufferBase::BUFFER_RAW ) + { + ADLASSERT( sizeof(T) <= 16 ); + srvbuffer_desc.Format = DXGI_FORMAT_R32_UINT; + srvbuffer_desc.Buffer.ElementWidth = nElems; +// if ( buffer_desc.MiscFlags & D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS ) +// { +// srvbuffer_desc.Format = DXGI_FORMAT_R32_TYPELESS; +// srvbuffer_desc.BufferEx.Flags = D3D11_BUFFEREX_SRV_FLAG_RAW; +// srvbuffer_desc.BufferEx.NumElements = buffer_desc.ByteWidth / 4; + } + else + { + srvbuffer_desc.Format = DXGI_FORMAT_UNKNOWN; + srvbuffer_desc.Buffer.ElementWidth = nElems; + } + srvbuffer_desc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER; + + hr = deviceData->m_device->CreateShaderResourceView(dBuf->getBuffer(), &srvbuffer_desc, dBuf->getSRVPtr()); + ADLASSERT( hr == S_OK ); + } + else if( type == BufferBase::BUFFER_APPEND ) + { + D3D11_UNORDERED_ACCESS_VIEW_DESC desc; + ZeroMemory( &desc, sizeof(desc) ); + desc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER; + desc.Buffer.FirstElement = 0; + + desc.Buffer.Flags = D3D11_BUFFER_UAV_FLAG_APPEND; + + desc.Format = DXGI_FORMAT_UNKNOWN; // Format must be must be DXGI_FORMAT_UNKNOWN, when creating a View of a Structured Buffer + desc.Buffer.NumElements = buffer_desc.ByteWidth / buffer_desc.StructureByteStride; + + hr = deviceData->m_device->CreateUnorderedAccessView( dBuf->getBuffer(), &desc, dBuf->getUAVPtr() ); + ADLASSERT( hr == S_OK ); + } + } +// else +// { +// ADLASSERT(0); +// } +} + +template +void DeviceDX11::deallocate(Buffer* buf) +{ + BufferDX11* dBuf = (BufferDX11*)buf; + + if( dBuf->getBuffer() ) + { + dBuf->getBuffer()->Release(); + dBuf->m_ptr = NULL; + } + if( dBuf->getUAV() ) + { + dBuf->getUAV()->Release(); + dBuf->m_uav = NULL; + } + if( dBuf->getSRV() ) + { + dBuf->getSRV()->Release(); + dBuf->m_srv = NULL; + } + buf->m_device = 0; +} + +template +void DeviceDX11::copy(Buffer* dst, const Buffer* src, int nElems) +{ + if( dst->m_device->m_type == TYPE_DX11 || src->m_device->m_type == TYPE_DX11 ) + { + DeviceDX11* deviceData = this; + BufferDX11* dDst = (BufferDX11*)dst; + BufferDX11* dSrc = (BufferDX11*)src; + + D3D11_MAPPED_SUBRESOURCE MappedVelResource = {0}; + + D3D11_BOX destRegion; + destRegion.left = 0*sizeof(T); + destRegion.front = 0; + destRegion.top = 0; + destRegion.bottom = 1; + destRegion.back = 1; + destRegion.right = (0+nElems)*sizeof(T); + + deviceData->m_context->CopySubresourceRegion( + dDst->getBuffer(), + 0, 0, 0, 0, + dSrc->getBuffer(), + 0, + &destRegion ); + + } + else if( src->m_device->m_type == TYPE_HOST ) + { + ADLASSERT( dst->getType() == TYPE_DX11 ); + dst->write( src->m_ptr, nElems ); + } + else if( dst->m_device->m_type == TYPE_HOST ) + { + ADLASSERT( src->getType() == TYPE_DX11 ); + src->read( dst->m_ptr, nElems ); + } + else + { + ADLASSERT( 0 ); + } +} + +template +void DeviceDX11::copy(T* dst, const Buffer* src, int nElems, int srcOffsetNElems) +{ + DeviceDX11* deviceData = this; + BufferDX11* dSrc = (BufferDX11*)src; + Buffer sBuf( deviceData, nElems, BufferBase::BUFFER_STAGING ); + BufferDX11* dStagingBuf = (BufferDX11*)&sBuf; + + + ID3D11Buffer *StagingBuffer = dStagingBuf->getBuffer(); + D3D11_MAPPED_SUBRESOURCE MappedVelResource = {0}; + + D3D11_BOX destRegion; + destRegion.left = srcOffsetNElems*sizeof(T); + destRegion.front = 0; + destRegion.top = 0; + destRegion.bottom = 1; + destRegion.back = 1; + destRegion.right = (srcOffsetNElems+nElems)*sizeof(T); + + deviceData->m_context->CopySubresourceRegion( + StagingBuffer, + 0, 0, 0, 0, + dSrc->getBuffer(), + 0, + &destRegion); + + deviceData->m_context->Map(StagingBuffer, 0, D3D11_MAP_READ, 0, &MappedVelResource); + memcpy(dst, MappedVelResource.pData, nElems*sizeof(T)); + deviceData->m_context->Unmap(StagingBuffer, 0); +} + +template +void DeviceDX11::copy(Buffer* dst, const T* src, int nElems, int dstOffsetNElems) +{ + BufferDX11* dBuf = (BufferDX11*)dst; + + DeviceDX11* deviceData = this; + + D3D11_BOX destRegion; + destRegion.left = dstOffsetNElems*sizeof(T); + destRegion.front = 0; + destRegion.top = 0; + destRegion.bottom = 1; + destRegion.back = 1; + destRegion.right = (dstOffsetNElems+nElems)*sizeof(T); + deviceData->m_context->UpdateSubresource(dBuf->getBuffer(), 0, &destRegion, src, 0, 0); +} + +void DeviceDX11::waitForCompletion() const +{ + const DeviceDX11* deviceData = this; + + ID3D11Query* syncQuery; + D3D11_QUERY_DESC qDesc; + qDesc.Query = D3D11_QUERY_EVENT; + qDesc.MiscFlags = 0; + deviceData->m_device->CreateQuery( &qDesc, &syncQuery ); + deviceData->m_context->End( syncQuery ); + while( deviceData->m_context->GetData( syncQuery, 0,0,0 ) == S_FALSE ){} + syncQuery->Release(); +} + +int DeviceDX11::getNDevices() +{ + IDXGIFactory1* factory = NULL; + IDXGIAdapter1* adapter = NULL; + CreateDXGIFactory1( __uuidof(IDXGIFactory1), (void**)&factory ); + + u32 i = 0; + while( factory->EnumAdapters1( i, &adapter ) != DXGI_ERROR_NOT_FOUND ) + { + i++; + } + + factory->Release(); + return i; +} + +void DeviceDX11::getDeviceName( char nameOut[128] ) const +{ + IDXGIAdapter* adapter;// = getAdapterFromDevice( this ); + { + IDXGIDevice* pDXGIDevice; + + ADLASSERT( m_device->QueryInterface(__uuidof(IDXGIDevice), (void **)&pDXGIDevice) == S_OK ); + ADLASSERT( pDXGIDevice->GetParent(__uuidof(IDXGIAdapter), (void **)&adapter) == S_OK ); + + pDXGIDevice->Release(); + } + DXGI_ADAPTER_DESC adapterDesc; + adapter->GetDesc( &adapterDesc ); + +// wcstombs( nameOut, adapterDesc.Description, 128 ); + size_t i; + wcstombs_s( &i, nameOut, 128, adapterDesc.Description, 128 ); +} + +Kernel* DeviceDX11::getKernel(const char* fileName, const char* funcName, const char* option, const char* src, bool cacheKernel ) const +{ + return m_kernelManager->query( this, fileName, funcName, option, src, cacheKernel ); +} + +#undef u32 + +#undef SAFE_RELEASE + +}; diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/DX11/AdlKernelUtilsDX11.inl b/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/DX11/AdlKernelUtilsDX11.inl new file mode 100644 index 000000000..d4e29999d --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/DX11/AdlKernelUtilsDX11.inl @@ -0,0 +1,348 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + + +namespace adl +{ + +#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } } + +struct KernelDX11 : public Kernel +{ + ID3D11ComputeShader* getKernel() { return (ID3D11ComputeShader*)m_kernel; } + ID3D11ComputeShader** getKernelPtr() { return (ID3D11ComputeShader**)&m_kernel; } +}; + + +__inline +#ifdef UNICODE +HRESULT FindDXSDKShaderFileCch( __in_ecount(cchDest) WCHAR* strDestPath, + int cchDest, + __in LPCWSTR strFilename ) +#else +HRESULT FindDXSDKShaderFileCch( __in_ecount(cchDest) CHAR* strDestPath, + int cchDest, + __in LPCSTR strFilename ) +#endif +{ + if( NULL == strFilename || strFilename[0] == 0 || NULL == strDestPath || cchDest < 10 ) + return E_INVALIDARG; + + // Get the exe name, and exe path +#ifdef UNICODE + WCHAR strExePath[MAX_PATH] = +#else + CHAR strExePath[MAX_PATH] = +#endif + { + 0 + }; +#ifdef UNICODE + WCHAR strExeName[MAX_PATH] = +#else + CHAR strExeName[MAX_PATH] = +#endif + { + 0 + }; +#ifdef UNICODE + WCHAR* strLastSlash = NULL; +#else + CHAR* strLastSlash = NULL; +#endif + GetModuleFileName( NULL, strExePath, MAX_PATH ); + strExePath[MAX_PATH - 1] = 0; +#ifdef UNICODE + strLastSlash = wcsrchr( strExePath, TEXT( '\\' ) ); +#else + strLastSlash = strrchr( strExePath, TEXT( '\\' ) ); +#endif + if( strLastSlash ) + { +#ifdef UNICODE + wcscpy_s( strExeName, MAX_PATH, &strLastSlash[1] ); +#else + +#endif + // Chop the exe name from the exe path + *strLastSlash = 0; + + // Chop the .exe from the exe name +#ifdef UNICODE + strLastSlash = wcsrchr( strExeName, TEXT( '.' ) ); +#else + strLastSlash = strrchr( strExeName, TEXT( '.' ) ); +#endif + if( strLastSlash ) + *strLastSlash = 0; + } + + // Search in directories: + // .\ + // %EXE_DIR%\..\..\%EXE_NAME% +#ifdef UNICODE + wcscpy_s( strDestPath, cchDest, strFilename ); +#else + strcpy_s( strDestPath, cchDest, strFilename ); +#endif + if( GetFileAttributes( strDestPath ) != 0xFFFFFFFF ) + return S_OK; + +// swprintf_s( strDestPath, cchDest, L"%s\\..\\..\\%s\\%s", strExePath, strExeName, strFilename ); +#ifdef UNICODE + swprintf_s( strDestPath, cchDest, L"%s\\..\\%s\\%s", strExePath, strExeName, strFilename ); +#else + sprintf_s( strDestPath, cchDest, "%s\\..\\%s\\%s", strExePath, strExeName, strFilename ); +#endif + if( GetFileAttributes( strDestPath ) != 0xFFFFFFFF ) + return S_OK; + + // On failure, return the file as the path but also return an error code +#ifdef UNICODE + wcscpy_s( strDestPath, cchDest, strFilename ); +#else + strcpy_s( strDestPath, cchDest, strFilename ); +#endif + + ADLASSERT( 0 ); + + return E_FAIL; +} + + + + +template<> +void KernelBuilder::setFromFile( const Device* deviceData, const char* fileName, const char* option, bool addExtension, + bool cacheKernel) +{ + char fileNameWithExtension[256]; + + if( addExtension ) + sprintf_s( fileNameWithExtension, "%s.hlsl", fileName ); + else + sprintf_s( fileNameWithExtension, "%s", fileName ); + + m_deviceData = deviceData; + + int nameLength = (int)strlen(fileNameWithExtension)+1; +#ifdef UNICODE + WCHAR* wfileNameWithExtension = new WCHAR[nameLength]; +#else + CHAR* wfileNameWithExtension = new CHAR[nameLength]; +#endif + memset(wfileNameWithExtension,0,nameLength); +#ifdef UNICODE + MultiByteToWideChar(CP_ACP,0,fileNameWithExtension,-1, wfileNameWithExtension, nameLength); +#else + sprintf_s(wfileNameWithExtension, nameLength, "%s", fileNameWithExtension); +#endif +// swprintf_s(wfileNameWithExtension, nameLength*2, L"%s", fileNameWithExtension); + + HRESULT hr; + + // Finds the correct path for the shader file. + // This is only required for this sample to be run correctly from within the Sample Browser, + // in your own projects, these lines could be removed safely + hr = FindDXSDKShaderFileCch( m_path, MAX_PATH, wfileNameWithExtension ); + + delete [] wfileNameWithExtension; + + ADLASSERT( hr == S_OK ); +} + +template<> +void KernelBuilder::setFromSrc( const Device* deviceData, const char* src, const char* option ) +{ + m_deviceData = deviceData; + m_ptr = (void*)src; + m_path[0] = '0'; +} + +template<> +KernelBuilder::~KernelBuilder() +{ + +} + +template<> +void KernelBuilder::createKernel( const char* funcName, Kernel& kernelOut ) +{ + const DeviceDX11* deviceData = (const DeviceDX11*)m_deviceData; + KernelDX11* dxKernel = (KernelDX11*)&kernelOut; + HRESULT hr; + + DWORD dwShaderFlags = D3DCOMPILE_ENABLE_STRICTNESS; +#if defined( DEBUG ) || defined( _DEBUG ) + // Set the D3DCOMPILE_DEBUG flag to embed debug information in the shaders. + // Setting this flag improves the shader debugging experience, but still allows + // the shaders to be optimized and to run exactly the way they will run in + // the release configuration of this program. + dwShaderFlags |= D3DCOMPILE_DEBUG; +#endif + + const D3D_SHADER_MACRO defines[] = + { +#ifdef USE_STRUCTURED_BUFFERS + "USE_STRUCTURED_BUFFERS", "1", +#endif + +#ifdef TEST_DOUBLE + "TEST_DOUBLE", "1", +#endif + NULL, NULL + }; + + // We generally prefer to use the higher CS shader profile when possible as CS 5.0 is better performance on 11-class hardware + LPCSTR pProfile = ( deviceData->m_device->GetFeatureLevel() >= D3D_FEATURE_LEVEL_11_0 ) ? "cs_5_0" : "cs_4_0"; + + ID3DBlob* pErrorBlob = NULL; + ID3DBlob* pBlob = NULL; + if( m_path[0] == '0' ) + { + char* src = (char*)m_ptr; + hr = D3DX11CompileFromMemory( src, strlen(src), 0, defines, NULL, funcName, pProfile, + dwShaderFlags, NULL, NULL, &pBlob, &pErrorBlob, NULL ); + } + else + { + hr = D3DX11CompileFromFile( m_path, defines, NULL, funcName, pProfile, + dwShaderFlags, NULL, NULL, &pBlob, &pErrorBlob, NULL ); + } + + if ( FAILED(hr) ) + { + debugPrintf("%s", (char*)pErrorBlob->GetBufferPointer()); + } + ADLASSERT( hr == S_OK ); + + hr = deviceData->m_device->CreateComputeShader( pBlob->GetBufferPointer(), pBlob->GetBufferSize(), NULL, + dxKernel->getKernelPtr() ); + +#if defined(DEBUG) || defined(PROFILE) + if ( kernelOut.m_kernel ) + kernelOut.m_kernel->SetPrivateData( WKPDID_D3DDebugObjectName, lstrlenA(pFunctionName), pFunctionName ); +#endif + + SAFE_RELEASE( pErrorBlob ); + SAFE_RELEASE( pBlob ); + + kernelOut.m_type = TYPE_DX11; +} + +template<> +void KernelBuilder::deleteKernel( Kernel& kernel ) +{ + KernelDX11* dxKernel = (KernelDX11*)&kernel; + + if( kernel.m_kernel ) + { + dxKernel->getKernel()->Release(); + kernel.m_kernel = NULL; + } +} + + + +class LauncherDX11 +{ + public: + typedef Launcher::BufferInfo BufferInfo; + + __inline + static void setBuffers( Launcher* launcher, BufferInfo* buffInfo, int n ); + template + __inline + static void setConst( Launcher* launcher, Buffer& constBuff, const T& consts ); + __inline + static void launch2D( Launcher* launcher, int numThreadsX, int numThreadsY, int localSizeX, int localSizeY ); +}; + +void LauncherDX11::setBuffers( Launcher* launcher, BufferInfo* buffInfo, int n ) +{ + KernelDX11* dxKernel = (KernelDX11*)launcher->m_kernel; + const DeviceDX11* dddx = (const DeviceDX11*)launcher->m_deviceData; + + for(int i=0; i* dBuf = (BufferDX11*)buffInfo[i].m_buffer; + if( buffInfo[i].m_isReadOnly ) + { + dddx->m_context->CSSetShaderResources( launcher->m_idx++, 1, dBuf->getSRVPtr() ); + } + else + { + // todo. cannot initialize append buffer with proper counter value which is the last arg + dddx->m_context->CSSetUnorderedAccessViews( launcher->m_idxRw++, 1, dBuf->getUAVPtr(), 0 ); + } + } +} + +template +void LauncherDX11::setConst( Launcher* launcher, Buffer& constBuff, const T& consts ) +{ + KernelDX11* dxKernel = (KernelDX11*)launcher->m_kernel; + const DeviceDX11* dddx = (const DeviceDX11*)launcher->m_deviceData; + BufferDX11* dBuf = (BufferDX11*)&constBuff; +/* + D3D11_MAPPED_SUBRESOURCE MappedResource; + dddx->m_context->Map( dBuf->getBuffer(), 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource ); + memcpy( MappedResource.pData, &consts, sizeof(T) ); + dddx->m_context->Unmap( dBuf->getBuffer(), 0 ); +*/ + + dddx->m_context->UpdateSubresource( dBuf->getBuffer(), 0, NULL, &consts, 0, 0 ); + + dddx->m_context->CSSetConstantBuffers( 0, 1, dBuf->getBufferPtr() ); +} + +void LauncherDX11::launch2D( Launcher* launcher, int numThreadsX, int numThreadsY, int localSizeX, int localSizeY ) +{ + KernelDX11* dxKernel = (KernelDX11*)launcher->m_kernel; + const DeviceDX11* dddx = (const DeviceDX11*)launcher->m_deviceData; + + dddx->m_context->CSSetShader( dxKernel->getKernel(), NULL, 0 ); + + int nx, ny, nz; + nx = max( 1, (numThreadsX/localSizeX)+(!(numThreadsX%localSizeX)?0:1) ); + ny = max( 1, (numThreadsY/localSizeY)+(!(numThreadsY%localSizeY)?0:1) ); + nz = 1; + + dddx->m_context->Dispatch( nx, ny, nz ); + + // set 0 to registers + { + dddx->m_context->CSSetShader( NULL, NULL, 0 ); + + if( launcher->m_idxRw ) + { + ID3D11UnorderedAccessView* aUAViewsNULL[ 16 ] = { 0 }; + dddx->m_context->CSSetUnorderedAccessViews( 0, + min( (unsigned int)launcher->m_idxRw, sizeof(aUAViewsNULL)/sizeof(*aUAViewsNULL) ), aUAViewsNULL, NULL ); + } + + if( launcher->m_idx ) + { + ID3D11ShaderResourceView* ppSRVNULL[16] = { 0 }; + dddx->m_context->CSSetShaderResources( 0, + min( (unsigned int)launcher->m_idx, sizeof(ppSRVNULL)/sizeof(*ppSRVNULL) ), ppSRVNULL ); + } + } +} + +#undef SAFE_RELEASE + +}; diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/DX11/AdlStopwatchDX11.inl b/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/DX11/AdlStopwatchDX11.inl new file mode 100644 index 000000000..15b79aac5 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/DX11/AdlStopwatchDX11.inl @@ -0,0 +1,131 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + + +namespace adl +{ + +struct StopwatchDX11 : public StopwatchBase +{ + public: + __inline + StopwatchDX11() : StopwatchBase(){} + __inline + ~StopwatchDX11(); + + __inline + void init( const Device* deviceData ); + __inline + void start(); + __inline + void split(); + __inline + void stop(); + __inline + float getMs(int index=0); + __inline + void getMs( float* times, int capacity ); + + public: + ID3D11Query* m_tQuery[CAPACITY+1]; + ID3D11Query* m_fQuery; + UINT64 m_t[CAPACITY]; +}; + +void StopwatchDX11::init( const Device* deviceData ) +{ + ADLASSERT( deviceData->m_type == TYPE_DX11 ); + m_device = deviceData; + { + D3D11_QUERY_DESC qDesc; + qDesc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT; + qDesc.MiscFlags = 0; + ((const DeviceDX11*)m_device)->m_device->CreateQuery( &qDesc, &m_fQuery ); + } + for(int i=0; im_device->CreateQuery( &qDesc, &m_tQuery[i] ); + } +} + +StopwatchDX11::~StopwatchDX11() +{ + m_fQuery->Release(); + for(int i=0; iRelease(); + } +} + +void StopwatchDX11::start() +{ + m_idx = 0; + ((const DeviceDX11*)m_device)->m_context->Begin( m_fQuery ); + ((const DeviceDX11*)m_device)->m_context->End( m_tQuery[m_idx++] ); +} + +void StopwatchDX11::split() +{ + if( m_idx < CAPACITY ) + ((const DeviceDX11*)m_device)->m_context->End( m_tQuery[m_idx++] ); +} + +void StopwatchDX11::stop() +{ + ((const DeviceDX11*)m_device)->m_context->End( m_tQuery[m_idx++] ); + ((const DeviceDX11*)m_device)->m_context->End( m_fQuery ); +} + +float StopwatchDX11::getMs(int index) +{ + D3D11_QUERY_DATA_TIMESTAMP_DISJOINT d; +// m_deviceData->m_context->End( m_fQuery ); + while( ((const DeviceDX11*)m_device)->m_context->GetData( m_fQuery, &d,sizeof(D3D11_QUERY_DATA_TIMESTAMP_DISJOINT),0 ) == S_FALSE ) {} + + while( ((const DeviceDX11*)m_device)->m_context->GetData( m_tQuery[0], &m_t[index],sizeof(UINT64),0 ) == S_FALSE ){} + while( ((const DeviceDX11*)m_device)->m_context->GetData( m_tQuery[1], &m_t[index+1],sizeof(UINT64),0 ) == S_FALSE ){} + + ADLASSERT( d.Disjoint == false ); + + float elapsedMs = (m_t[index+1] - m_t[index])/(float)d.Frequency*1000; + return elapsedMs; + +} + +void StopwatchDX11::getMs( float* times, int capacity ) +{ + ADLASSERT( capacity <= CAPACITY ); + + D3D11_QUERY_DATA_TIMESTAMP_DISJOINT d; + while( ((const DeviceDX11*)m_device)->m_context->GetData( m_fQuery, &d,sizeof(D3D11_QUERY_DATA_TIMESTAMP_DISJOINT),0 ) == S_FALSE ) {} + + for(int i=0; im_context->GetData( m_tQuery[i], &m_t[i],sizeof(UINT64),0 ) == S_FALSE ){} + } + + ADLASSERT( d.Disjoint == false ); + + for(int i=0; i + __inline + void allocate(Buffer* buf, int nElems, BufferBase::BufferType type); + + template + __inline + void deallocate(Buffer* buf); + + template + __inline + void copy(Buffer* dst, const Buffer* src, int nElems); + + template + __inline + void copy(T* dst, const Buffer* src, int nElems, int offsetNElems = 0); + + template + __inline + void copy(Buffer* dst, const T* src, int nElems, int offsetNElems = 0); + + __inline + void waitForCompletion() const; +}; + +void DeviceHost::initialize(const Config& cfg) +{ + +} + +void DeviceHost::release() +{ + +} + +template +void DeviceHost::allocate(Buffer* buf, int nElems, BufferBase::BufferType type) +{ + buf->m_device = this; + + if( type == BufferBase::BUFFER_CONST ) return; + + buf->m_ptr = new T[nElems]; + ADLASSERT( buf->m_ptr ); + buf->m_size = nElems; +} + +template +void DeviceHost::deallocate(Buffer* buf) +{ + if( buf->m_ptr ) delete [] buf->m_ptr; +} + +template +void DeviceHost::copy(Buffer* dst, const Buffer* src, int nElems) +{ + copy( dst, src->m_ptr, nElems ); +} + +template +void DeviceHost::copy(T* dst, const Buffer* src, int nElems, int srcOffsetNElems) +{ + ADLASSERT( src->getType() == TYPE_HOST ); + memcpy( dst, src->m_ptr+srcOffsetNElems, nElems*sizeof(T) ); +} + +template +void DeviceHost::copy(Buffer* dst, const T* src, int nElems, int dstOffsetNElems) +{ + ADLASSERT( dst->getType() == TYPE_HOST ); + memcpy( dst->m_ptr+dstOffsetNElems, src, nElems*sizeof(T) ); +} + +void DeviceHost::waitForCompletion() const +{ + +} + +}; diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/Host/AdlStopwatchHost.inl b/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/Host/AdlStopwatchHost.inl new file mode 100644 index 000000000..bb6eb571c --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/Host/AdlStopwatchHost.inl @@ -0,0 +1,119 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + +#ifdef _WIN32 + #include +#else + #include +#endif + +namespace adl +{ + +class StopwatchHost : public StopwatchBase +{ + public: + __inline + StopwatchHost(); + __inline + void init( const Device* deviceData ); + __inline + void start(); + __inline + void split(); + __inline + void stop(); + __inline + float getMs(int index=0); + __inline + void getMs( float* times, int capacity ); + + private: +#ifdef _WIN32 + LARGE_INTEGER m_frequency; + LARGE_INTEGER m_t[CAPACITY]; +#else + struct timeval mStartTime; + timeval m_t[CAPACITY]; +#endif +}; + +__inline +StopwatchHost::StopwatchHost() + : StopwatchBase() +{ +} + +__inline +void StopwatchHost::init( const Device* deviceData ) +{ + m_device = deviceData; +#ifdef _WIN32 + QueryPerformanceFrequency( &m_frequency ); +#else + gettimeofday(&mStartTime, 0); +#endif +} + +__inline +void StopwatchHost::start() +{ + m_idx = 0; +#ifdef _WIN32 + QueryPerformanceCounter(&m_t[m_idx++]); +#else + gettimeofday(&m_t[m_idx++], 0); +#endif +} + +__inline +void StopwatchHost::split() +{ +#ifdef _WIN32 + QueryPerformanceCounter(&m_t[m_idx++]); +#else + gettimeofday(&m_t[m_idx++], 0); +#endif +} + +__inline +void StopwatchHost::stop() +{ + split(); +} + +__inline +float StopwatchHost::getMs(int index) +{ +#ifdef _WIN32 + return (float)(1000*(m_t[index+1].QuadPart - m_t[index].QuadPart))/m_frequency.QuadPart; +#else + return (m_t[index+1].tv_sec - m_t[index].tv_sec) * 1000 + + (m_t[index+1].tv_usec - m_t[index].tv_usec) / 1000; +#endif +} + +__inline +void StopwatchHost::getMs(float* times, int capacity) +{ + for(int i=0; i +#include + +namespace adl +{ + +class CopyBase +{ + public: + enum Option + { + PER_WI_1, + PER_WI_2, + PER_WI_4, + }; +}; + +template +class Copy : public CopyBase +{ + public: + typedef Launcher::BufferInfo BufferInfo; + + struct Data + { + const Device* m_device; + Kernel* m_copy1F4Kernel; + Kernel* m_copy2F4Kernel; + Kernel* m_copy4F4Kernel; + Kernel* m_copyF1Kernel; + Kernel* m_copyF2Kernel; + Buffer* m_constBuffer; + }; + + static + Data* allocate(const Device* deviceData); + + static + void deallocate(Data* data); + + static + void execute( Data* data, Buffer& dst, Buffer& src, int n, Option option = PER_WI_1); + + static + void execute( Data* data, Buffer& dst, Buffer& src, int n); + + static + void execute( Data* data, Buffer& dst, Buffer& src, int n); +}; + + +#include +#include + +}; diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Copy/Copy.inl b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Copy/Copy.inl new file mode 100644 index 000000000..ee6d3f0ef --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Copy/Copy.inl @@ -0,0 +1,151 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + + +#define PATH "..\\..\\opencl\\primitives\\AdlPrimitives\\Copy\\CopyKernels" +#define KERNEL0 "Copy1F4Kernel" +#define KERNEL1 "Copy2F4Kernel" +#define KERNEL2 "Copy4F4Kernel" +#define KERNEL3 "CopyF1Kernel" +#define KERNEL4 "CopyF2Kernel" + +#include +#include + + +template +typename Copy::Data* Copy::allocate( const Device* device ) +{ + ADLASSERT( TYPE == device->m_type ); + + + const char* src[] = +#if defined(ADL_LOAD_KERNEL_FROM_STRING) + {copyKernelsCL, copyKernelsDX11}; +// ADLASSERT(0); +#else + {0,0}; +#endif + + Data* data = new Data; + data->m_device = device; + data->m_copy1F4Kernel = device->getKernel( PATH, KERNEL0, 0, src[TYPE] ); + data->m_copy2F4Kernel = device->getKernel( PATH, KERNEL1, 0, src[TYPE] ); + data->m_copy4F4Kernel = device->getKernel( PATH, KERNEL2, 0, src[TYPE] ); + data->m_copyF1Kernel = device->getKernel( PATH, KERNEL3, 0, src[TYPE] ); + data->m_copyF2Kernel = device->getKernel( PATH, KERNEL4, 0, src[TYPE] ); + data->m_constBuffer = new Buffer( device, 1, BufferBase::BUFFER_CONST ); + + return data; +} + +template +void Copy::deallocate( Data* data ) +{ + delete data->m_constBuffer; + delete data; +} + +template +void Copy::execute( Data* data, Buffer& dst, Buffer& src, int n, Option option ) +{ + ADLASSERT( TYPE == dst.getType() ); + ADLASSERT( TYPE == src.getType() ); + + int4 constBuffer; + constBuffer.x = n; + + switch (option) + { + case PER_WI_1: + { + BufferInfo bInfo[] = { BufferInfo( &dst ), BufferInfo( &src, true ) }; + + Launcher launcher( data->m_device, data->m_copy1F4Kernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( *data->m_constBuffer, constBuffer ); + launcher.launch1D( n/1 ); + } + break; + case PER_WI_2: + { + ADLASSERT( n%2 == 0 ); + BufferInfo bInfo[] = { BufferInfo( &dst ), BufferInfo( &src, true ) }; + + Launcher launcher( data->m_device, data->m_copy2F4Kernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( *data->m_constBuffer, constBuffer ); + launcher.launch1D( n/2 ); + } + break; + case PER_WI_4: + { + ADLASSERT( n%4 == 0 ); + BufferInfo bInfo[] = { BufferInfo( &dst ), BufferInfo( &src, true ) }; + + Launcher launcher( data->m_device, data->m_copy4F4Kernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( *data->m_constBuffer, constBuffer ); + launcher.launch1D( n/4 ); + } + break; + default: + ADLASSERT(0); + break; + }; +} + +template +void Copy::execute( Data* data, Buffer& dst, Buffer& src, int n ) +{ + ADLASSERT( TYPE == dst.getType() ); + ADLASSERT( TYPE == src.getType() ); + + int4 constBuffer; + constBuffer.x = n; + + BufferInfo bInfo[] = { BufferInfo( &dst ), BufferInfo( &src, true ) }; + + Launcher launcher( data->m_device, data->m_copyF2Kernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( *data->m_constBuffer, constBuffer ); + launcher.launch1D( n/1 ); +} + +template +void Copy::execute( Data* data, Buffer& dst, Buffer& src, int n ) +{ + ADLASSERT( TYPE == dst.getType() ); + ADLASSERT( TYPE == src.getType() ); + + int4 constBuffer; + constBuffer.x = n; + + BufferInfo bInfo[] = { BufferInfo( &dst ), BufferInfo( &src, true ) }; + + Launcher launcher( data->m_device, data->m_copyF1Kernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( *data->m_constBuffer, constBuffer ); + launcher.launch1D( n/1 ); +} + + +#undef PATH +#undef KERNEL0 +#undef KERNEL1 +#undef KERNEL2 +#undef KERNEL3 +#undef KERNEL4 diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Copy/CopyHost.inl b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Copy/CopyHost.inl new file mode 100644 index 000000000..2f8562a29 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Copy/CopyHost.inl @@ -0,0 +1,85 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +template<> +class Copy : public CopyBase +{ + public: + typedef Launcher::BufferInfo BufferInfo; + + struct Data + { + }; + + static + Data* allocate(const Device* deviceData) + { + ADLASSERT( TYPE_HOST == deviceData->m_type ); + return 0; + } + + static + void deallocate(Data* data) + { + return; + } + + static + void execute( Data* data, Buffer& dst, Buffer& src, int n, Option option = PER_WI_1) + { + ADLASSERT( TYPE_HOST == dst.getType() ); + ADLASSERT( TYPE_HOST == src.getType() ); + + HostBuffer& dstH = (HostBuffer&)dst; + HostBuffer& srcH = (HostBuffer&)src; + + for(int i=0; i& dst, Buffer& src, int n) + { + ADLASSERT( TYPE_HOST == dst.getType() ); + ADLASSERT( TYPE_HOST == src.getType() ); + + HostBuffer& dstH = (HostBuffer&)dst; + HostBuffer& srcH = (HostBuffer&)src; + + for(int i=0; i& dst, Buffer& src, int n) + { + ADLASSERT( TYPE_HOST == dst.getType() ); + ADLASSERT( TYPE_HOST == src.getType() ); + + HostBuffer& dstH = (HostBuffer&)dst; + HostBuffer& srcH = (HostBuffer&)src; + + for(int i=0; i dst : register( u0 ); +StructuredBuffer src : register( t0 ); + +[numthreads(WG_SIZE, 1, 1)] +void Copy1F4Kernel( DEFAULT_ARGS ) +{ + int gIdx = GET_GLOBAL_IDX; + + if( gIdx < m_n ) + { + float4 a0 = src[gIdx]; + + dst[ gIdx ] = a0; + } +} + +[numthreads(WG_SIZE, 1, 1)] +void Copy2F4Kernel( DEFAULT_ARGS ) +{ + int gIdx = GET_GLOBAL_IDX; + + if( 2*gIdx <= m_n ) + { + float4 a0 = src[gIdx*2+0]; + float4 a1 = src[gIdx*2+1]; + + dst[ gIdx*2+0 ] = a0; + dst[ gIdx*2+1 ] = a1; + } +} + +[numthreads(WG_SIZE, 1, 1)] +void Copy4F4Kernel( DEFAULT_ARGS ) +{ + int gIdx = GET_GLOBAL_IDX; + + if( 4*gIdx <= m_n ) + { + int idx0 = gIdx*4+0; + int idx1 = gIdx*4+1; + int idx2 = gIdx*4+2; + int idx3 = gIdx*4+3; + + float4 a0 = src[idx0]; + float4 a1 = src[idx1]; + float4 a2 = src[idx2]; + float4 a3 = src[idx3]; + + dst[ idx0 ] = a0; + dst[ idx1 ] = a1; + dst[ idx2 ] = a2; + dst[ idx3 ] = a3; + } +} + +RWStructuredBuffer dstF1 : register( u0 ); +StructuredBuffer srcF1 : register( t0 ); + +[numthreads(WG_SIZE, 1, 1)] +void CopyF1Kernel( DEFAULT_ARGS ) +{ + int gIdx = GET_GLOBAL_IDX; + + if( gIdx < m_n ) + { + float a0 = srcF1[gIdx]; + + dstF1[ gIdx ] = a0; + } + +} + +RWStructuredBuffer dstF2 : register( u0 ); +StructuredBuffer srcF2 : register( t0 ); + +[numthreads(WG_SIZE, 1, 1)] +void CopyF2Kernel( DEFAULT_ARGS ) +{ + int gIdx = GET_GLOBAL_IDX; + + if( gIdx < m_n ) + { + float2 a0 = srcF2[gIdx]; + + dstF2[ gIdx ] = a0; + } +} diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Copy/CopyKernelsCL.h b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Copy/CopyKernelsCL.h new file mode 100644 index 000000000..3b6789201 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Copy/CopyKernelsCL.h @@ -0,0 +1,119 @@ +static const char* copyKernelsCL= \ +"/*\n" +" 2011 Takahiro Harada\n" +"*/\n" +"\n" +"#pragma OPENCL EXTENSION cl_amd_printf : enable\n" +"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" +"\n" +"typedef unsigned int u32;\n" +"#define GET_GROUP_IDX get_group_id(0)\n" +"#define GET_LOCAL_IDX get_local_id(0)\n" +"#define GET_GLOBAL_IDX get_global_id(0)\n" +"#define GET_GROUP_SIZE get_local_size(0)\n" +"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" +"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" +"#define AtomInc(x) atom_inc(&(x))\n" +"#define AtomInc1(x, out) out = atom_inc(&(x))\n" +"\n" +"#define make_uint4 (uint4)\n" +"#define make_uint2 (uint2)\n" +"#define make_int2 (int2)\n" +"\n" +"typedef struct\n" +"{\n" +" int m_n;\n" +" int m_padding[3];\n" +"} ConstBuffer;\n" +"\n" +"\n" +"\n" +"__kernel\n" +"__attribute__((reqd_work_group_size(64,1,1)))\n" +"void Copy1F4Kernel(__global float4* dst, __global float4* src, \n" +" ConstBuffer cb)\n" +"{\n" +" int gIdx = GET_GLOBAL_IDX;\n" +"\n" +" if( gIdx < cb.m_n )\n" +" {\n" +" float4 a0 = src[gIdx];\n" +"\n" +" dst[ gIdx ] = a0;\n" +" }\n" +"}\n" +"\n" +"__kernel\n" +"__attribute__((reqd_work_group_size(64,1,1)))\n" +"void Copy2F4Kernel(__global float4* dst, __global float4* src, \n" +" ConstBuffer cb)\n" +"{\n" +" int gIdx = GET_GLOBAL_IDX;\n" +"\n" +" if( 2*gIdx <= cb.m_n )\n" +" {\n" +" float4 a0 = src[gIdx*2+0];\n" +" float4 a1 = src[gIdx*2+1];\n" +"\n" +" dst[ gIdx*2+0 ] = a0;\n" +" dst[ gIdx*2+1 ] = a1;\n" +" }\n" +"}\n" +"\n" +"__kernel\n" +"__attribute__((reqd_work_group_size(64,1,1)))\n" +"void Copy4F4Kernel(__global float4* dst, __global float4* src, \n" +" ConstBuffer cb)\n" +"{\n" +" int gIdx = GET_GLOBAL_IDX;\n" +"\n" +" if( 4*gIdx <= cb.m_n )\n" +" {\n" +" int idx0 = gIdx*4+0;\n" +" int idx1 = gIdx*4+1;\n" +" int idx2 = gIdx*4+2;\n" +" int idx3 = gIdx*4+3;\n" +"\n" +" float4 a0 = src[idx0];\n" +" float4 a1 = src[idx1];\n" +" float4 a2 = src[idx2];\n" +" float4 a3 = src[idx3];\n" +"\n" +" dst[ idx0 ] = a0;\n" +" dst[ idx1 ] = a1;\n" +" dst[ idx2 ] = a2;\n" +" dst[ idx3 ] = a3;\n" +" }\n" +"}\n" +"\n" +"__kernel\n" +"__attribute__((reqd_work_group_size(64,1,1)))\n" +"void CopyF1Kernel(__global float* dstF1, __global float* srcF1, \n" +" ConstBuffer cb)\n" +"{\n" +" int gIdx = GET_GLOBAL_IDX;\n" +"\n" +" if( gIdx < cb.m_n )\n" +" {\n" +" float a0 = srcF1[gIdx];\n" +"\n" +" dstF1[ gIdx ] = a0;\n" +" }\n" +"}\n" +"\n" +"__kernel\n" +"__attribute__((reqd_work_group_size(64,1,1)))\n" +"void CopyF2Kernel(__global float2* dstF2, __global float2* srcF2, \n" +" ConstBuffer cb)\n" +"{\n" +" int gIdx = GET_GLOBAL_IDX;\n" +"\n" +" if( gIdx < cb.m_n )\n" +" {\n" +" float2 a0 = srcF2[gIdx];\n" +"\n" +" dstF2[ gIdx ] = a0;\n" +" }\n" +"}\n" +"\n" +; diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Copy/CopyKernelsDX11.h b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Copy/CopyKernelsDX11.h new file mode 100644 index 000000000..6abcda4a9 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Copy/CopyKernelsDX11.h @@ -0,0 +1,120 @@ +static const char* copyKernelsDX11= \ +"/*\n" +" 2011 Takahiro Harada\n" +"*/\n" +"\n" +"typedef uint u32;\n" +"\n" +"#define GET_GROUP_IDX groupIdx.x\n" +"#define GET_LOCAL_IDX localIdx.x\n" +"#define GET_GLOBAL_IDX globalIdx.x\n" +"#define GROUP_LDS_BARRIER GroupMemoryBarrierWithGroupSync()\n" +"#define GROUP_MEM_FENCE\n" +"#define DEFAULT_ARGS uint3 globalIdx : SV_DispatchThreadID, uint3 localIdx : SV_GroupThreadID, uint3 groupIdx : SV_GroupID\n" +"#define AtomInc(x) InterlockedAdd(x, 1)\n" +"#define AtomInc1(x, out) InterlockedAdd(x, 1, out)\n" +"\n" +"#define make_uint4 uint4\n" +"#define make_uint2 uint2\n" +"#define make_int2 int2\n" +"\n" +"#define WG_SIZE 64\n" +"\n" +"#define GET_GROUP_SIZE WG_SIZE\n" +"\n" +"\n" +"\n" +"cbuffer CB : register( b0 )\n" +"{\n" +" int m_n;\n" +" int m_padding[3];\n" +"};\n" +"\n" +"RWStructuredBuffer dst : register( u0 );\n" +"StructuredBuffer src : register( t0 );\n" +"\n" +"[numthreads(WG_SIZE, 1, 1)]\n" +"void Copy1F4Kernel( DEFAULT_ARGS )\n" +"{\n" +" int gIdx = GET_GLOBAL_IDX;\n" +"\n" +" if( gIdx < m_n )\n" +" {\n" +" float4 a0 = src[gIdx];\n" +"\n" +" dst[ gIdx ] = a0;\n" +" }\n" +"}\n" +"\n" +"[numthreads(WG_SIZE, 1, 1)]\n" +"void Copy2F4Kernel( DEFAULT_ARGS )\n" +"{\n" +" int gIdx = GET_GLOBAL_IDX;\n" +"\n" +" if( 2*gIdx <= m_n )\n" +" {\n" +" float4 a0 = src[gIdx*2+0];\n" +" float4 a1 = src[gIdx*2+1];\n" +"\n" +" dst[ gIdx*2+0 ] = a0;\n" +" dst[ gIdx*2+1 ] = a1;\n" +" }\n" +"}\n" +"\n" +"[numthreads(WG_SIZE, 1, 1)]\n" +"void Copy4F4Kernel( DEFAULT_ARGS )\n" +"{\n" +" int gIdx = GET_GLOBAL_IDX;\n" +"\n" +" if( 4*gIdx <= m_n )\n" +" {\n" +" int idx0 = gIdx*4+0;\n" +" int idx1 = gIdx*4+1;\n" +" int idx2 = gIdx*4+2;\n" +" int idx3 = gIdx*4+3;\n" +"\n" +" float4 a0 = src[idx0];\n" +" float4 a1 = src[idx1];\n" +" float4 a2 = src[idx2];\n" +" float4 a3 = src[idx3];\n" +"\n" +" dst[ idx0 ] = a0;\n" +" dst[ idx1 ] = a1;\n" +" dst[ idx2 ] = a2;\n" +" dst[ idx3 ] = a3;\n" +" }\n" +"}\n" +"\n" +"RWStructuredBuffer dstF1 : register( u0 );\n" +"StructuredBuffer srcF1 : register( t0 );\n" +"\n" +"[numthreads(WG_SIZE, 1, 1)]\n" +"void CopyF1Kernel( DEFAULT_ARGS )\n" +"{\n" +" int gIdx = GET_GLOBAL_IDX;\n" +"\n" +" if( gIdx < m_n )\n" +" {\n" +" float a0 = srcF1[gIdx];\n" +"\n" +" dstF1[ gIdx ] = a0;\n" +" }\n" +"\n" +"}\n" +"\n" +"RWStructuredBuffer dstF2 : register( u0 );\n" +"StructuredBuffer srcF2 : register( t0 );\n" +"\n" +"[numthreads(WG_SIZE, 1, 1)]\n" +"void CopyF2Kernel( DEFAULT_ARGS )\n" +"{\n" +" int gIdx = GET_GLOBAL_IDX;\n" +"\n" +" if( gIdx < m_n )\n" +" {\n" +" float2 a0 = srcF2[gIdx];\n" +"\n" +" dstF2[ gIdx ] = a0;\n" +" }\n" +"}\n" +; diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Fill/Fill.h b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Fill/Fill.h new file mode 100644 index 000000000..35957e83d --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Fill/Fill.h @@ -0,0 +1,77 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + +#pragma once + +#include +#include + +namespace adl +{ + +class FillBase +{ + public: + enum Option + { + + }; +}; + +template +class Fill +{ + public: + typedef Launcher::BufferInfo BufferInfo; + + struct ConstData + { + int4 m_data; + int m_offset; + int m_n; + int m_padding[2]; + }; + + struct Data + { + const Device* m_device; + Kernel* m_fillIntKernel; + Kernel* m_fillInt2Kernel; + Kernel* m_fillInt4Kernel; + Buffer* m_constBuffer; + }; + + static + Data* allocate(const Device* deviceData); + + static + void deallocate(Data* data); + + static + void execute(Data* data, Buffer& src, const int& value, int n, int offset = 0); + + static + void execute(Data* data, Buffer& src, const int2& value, int n, int offset = 0); + + static + void execute(Data* data, Buffer& src, const int4& value, int n, int offset = 0); + +}; + + +#include +#include + +}; diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Fill/Fill.inl b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Fill/Fill.inl new file mode 100644 index 000000000..913db9b66 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Fill/Fill.inl @@ -0,0 +1,123 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +//#define PATH "..\\..\\AdlPrimitives\\Fill\\FillKernels" +#define PATH "..\\..\\opencl\\primitives\\AdlPrimitives\\Fill\\FillKernels" +#define KERNEL0 "FillIntKernel" +#define KERNEL1 "FillInt2Kernel" +#define KERNEL2 "FillInt4Kernel" + +#include +#include + + +template +typename Fill::Data* Fill::allocate( const Device* device ) +{ + ADLASSERT( TYPE == device->m_type ); + + const char* src[] = +#if defined(ADL_LOAD_KERNEL_FROM_STRING) + {fillKernelsCL, fillKernelsDX11}; +#else + {0,0}; +#endif + + Data* data = new Data; + data->m_device = device; + data->m_fillIntKernel = device->getKernel( PATH, KERNEL0, 0, src[TYPE] ); + data->m_fillInt2Kernel = device->getKernel( PATH, KERNEL1, 0, src[TYPE] ); + data->m_fillInt4Kernel = device->getKernel( PATH, KERNEL2, 0, src[TYPE] ); + data->m_constBuffer = new Buffer( device, 1, BufferBase::BUFFER_CONST ); + + return data; +} + +template +void Fill::deallocate( Data* data ) +{ + delete data->m_constBuffer; + delete data; +} + +template +void Fill::execute(Data* data, Buffer& src, const int& value, int n, int offset) +{ + ADLASSERT( n>0 ); + ConstData constBuffer; + { + constBuffer.m_offset = offset; + constBuffer.m_n = n; + constBuffer.m_data = make_int4( value ); + } + + { + BufferInfo bInfo[] = { BufferInfo( &src ) }; + + Launcher launcher( data->m_device, data->m_fillIntKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( *data->m_constBuffer, constBuffer ); + launcher.launch1D( n ); + } +} + +template +void Fill::execute(Data* data, Buffer& src, const int2& value, int n, int offset) +{ + ADLASSERT( n>0 ); + ConstData constBuffer; + { + constBuffer.m_offset = offset; + constBuffer.m_n = n; + constBuffer.m_data = make_int4( value.x, value.y, 0, 0 ); + } + + { + BufferInfo bInfo[] = { BufferInfo( &src ) }; + + Launcher launcher( data->m_device, data->m_fillInt2Kernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( *data->m_constBuffer, constBuffer ); + launcher.launch1D( n ); + } +} + +template +void Fill::execute(Data* data, Buffer& src, const int4& value, int n, int offset) +{ + ADLASSERT( n>0 ); + ConstData constBuffer; + { + constBuffer.m_offset = offset; + constBuffer.m_n = n; + constBuffer.m_data = value; + } + + { + BufferInfo bInfo[] = { BufferInfo( &src ) }; + + Launcher launcher( data->m_device, data->m_fillInt4Kernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( *data->m_constBuffer, constBuffer ); + launcher.launch1D( n ); + } +} + +#undef PATH +#undef KERNEL0 +#undef KERNEL1 +#undef KERNEL2 + diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Fill/FillHost.inl b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Fill/FillHost.inl new file mode 100644 index 000000000..c6205fa98 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Fill/FillHost.inl @@ -0,0 +1,99 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + +template<> +class Fill +{ + public: + struct Data + { + }; + + static + Data* allocate(const Device* deviceData) + { + return 0; + } + + static + void deallocate(Data* data) + { + + } + + template + static + void executeImpl(Data* data, Buffer& src, const T& value, int n, int offset = 0) + { + ADLASSERT( src.getType() == TYPE_HOST ); + ADLASSERT( src.m_size >= offset+n ); + HostBuffer& hSrc = (HostBuffer&)src; + + for(int idx=offset; idx& src, const int& value, int n, int offset = 0) + { + executeImpl( data, src, value, n, offset ); + } + + static + void execute(Data* data, Buffer& src, const int2& value, int n, int offset = 0) + { + executeImpl( data, src, value, n, offset ); + } + + static + void execute(Data* data, Buffer& src, const int4& value, int n, int offset = 0) + { + executeImpl( data, src, value, n, offset ); + } + +/* + static + void execute(Data* data, Buffer& src, int value, int n, int offset = 0) + { + ADLASSERT( src.getType() == TYPE_HOST ); + ADLASSERT( src.m_size <= offset+n ); + HostBuffer& hSrc = (HostBuffer&)src; + + for(int idx=offset; idx& src, const int2& value, int n, int offset = 0) + { + ADLASSERT( src.getType() == TYPE_HOST ); + ADLASSERT( src.m_size <= offset+n ); + + } + + static + void execute(Data* data, Buffer& src, const int4& value, int n, int offset = 0) + { + ADLASSERT( src.getType() == TYPE_HOST ); + ADLASSERT( src.m_size <= offset+n ); + + } +*/ +}; + diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Fill/FillKernels.cl b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Fill/FillKernels.cl new file mode 100644 index 000000000..11a31b0c5 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Fill/FillKernels.cl @@ -0,0 +1,81 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +#pragma OPENCL EXTENSION cl_amd_printf : enable +#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable + +typedef unsigned int u32; +#define GET_GROUP_IDX get_group_id(0) +#define GET_LOCAL_IDX get_local_id(0) +#define GET_GLOBAL_IDX get_global_id(0) +#define GET_GROUP_SIZE get_local_size(0) +#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) +#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) +#define AtomInc(x) atom_inc(&(x)) +#define AtomInc1(x, out) out = atom_inc(&(x)) + +#define make_uint4 (uint4) +#define make_uint2 (uint2) +#define make_int2 (int2) + +typedef struct +{ + int4 m_data; + int m_offset; + int m_n; + int m_padding[2]; +} ConstBuffer; + + +__kernel +__attribute__((reqd_work_group_size(64,1,1))) +void FillIntKernel(__global int* dstInt, + ConstBuffer cb) +{ + int gIdx = GET_GLOBAL_IDX; + + if( gIdx < cb.m_n ) + { + dstInt[ cb.m_offset+gIdx ] = cb.m_data.x; + } +} + +__kernel +__attribute__((reqd_work_group_size(64,1,1))) +void FillInt2Kernel(__global int2* dstInt2, + ConstBuffer cb) +{ + int gIdx = GET_GLOBAL_IDX; + + if( gIdx < cb.m_n ) + { + dstInt2[ cb.m_offset+gIdx ] = make_int2( cb.m_data.x, cb.m_data.y ); + } +} + +__kernel +__attribute__((reqd_work_group_size(64,1,1))) +void FillInt4Kernel(__global int4* dstInt4, + ConstBuffer cb) +{ + int gIdx = GET_GLOBAL_IDX; + + if( gIdx < cb.m_n ) + { + dstInt4[ cb.m_offset+gIdx ] = cb.m_data; + } +} + diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Fill/FillKernels.hlsl b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Fill/FillKernels.hlsl new file mode 100644 index 000000000..ead907d5e --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Fill/FillKernels.hlsl @@ -0,0 +1,79 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +typedef uint u32; + +#define GET_GROUP_IDX groupIdx.x +#define GET_LOCAL_IDX localIdx.x +#define GET_GLOBAL_IDX globalIdx.x +#define GROUP_LDS_BARRIER GroupMemoryBarrierWithGroupSync() +#define GROUP_MEM_FENCE +#define DEFAULT_ARGS uint3 globalIdx : SV_DispatchThreadID, uint3 localIdx : SV_GroupThreadID, uint3 groupIdx : SV_GroupID +#define AtomInc(x) InterlockedAdd(x, 1) +#define AtomInc1(x, out) InterlockedAdd(x, 1, out) + +#define make_uint4 uint4 +#define make_uint2 uint2 +#define make_int2 int2 + + +cbuffer CB : register( b0 ) +{ + int4 m_data; + int m_offset; + int m_n; + int m_padding[2]; +}; + + +RWStructuredBuffer dstInt : register( u0 ); + +[numthreads(64, 1, 1)] +void FillIntKernel( DEFAULT_ARGS ) +{ + int gIdx = GET_GLOBAL_IDX; + + if( gIdx < m_n ) + { + dstInt[ m_offset+gIdx ] = m_data.x; + } +} + +RWStructuredBuffer dstInt2 : register( u0 ); + +[numthreads(64, 1, 1)] +void FillInt2Kernel( DEFAULT_ARGS ) +{ + int gIdx = GET_GLOBAL_IDX; + + if( gIdx < m_n ) + { + dstInt2[ m_offset+gIdx ] = make_int2( m_data.x, m_data.y ); + } +} + +RWStructuredBuffer dstInt4 : register( u0 ); + +[numthreads(64, 1, 1)] +void FillInt4Kernel( DEFAULT_ARGS ) +{ + int gIdx = GET_GLOBAL_IDX; + + if( gIdx < m_n ) + { + dstInt4[ m_offset+gIdx ] = m_data; + } +} diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Fill/FillKernelsCL.h b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Fill/FillKernelsCL.h new file mode 100644 index 000000000..e2899ffbc --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Fill/FillKernelsCL.h @@ -0,0 +1,71 @@ +static const char* fillKernelsCL= \ +"/*\n" +" 2011 Takahiro Harada\n" +"*/\n" +"\n" +"#pragma OPENCL EXTENSION cl_amd_printf : enable\n" +"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" +"\n" +"typedef unsigned int u32;\n" +"#define GET_GROUP_IDX get_group_id(0)\n" +"#define GET_LOCAL_IDX get_local_id(0)\n" +"#define GET_GLOBAL_IDX get_global_id(0)\n" +"#define GET_GROUP_SIZE get_local_size(0)\n" +"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" +"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" +"#define AtomInc(x) atom_inc(&(x))\n" +"#define AtomInc1(x, out) out = atom_inc(&(x))\n" +"\n" +"#define make_uint4 (uint4)\n" +"#define make_uint2 (uint2)\n" +"#define make_int2 (int2)\n" +"\n" +"typedef struct\n" +"{\n" +" int4 m_data;\n" +" int m_offset;\n" +" int m_n;\n" +" int m_padding[2];\n" +"} ConstBuffer;\n" +"\n" +"\n" +"__kernel\n" +"__attribute__((reqd_work_group_size(64,1,1)))\n" +"void FillIntKernel(__global int* dstInt, \n" +" ConstBuffer cb)\n" +"{\n" +" int gIdx = GET_GLOBAL_IDX;\n" +"\n" +" if( gIdx < cb.m_n )\n" +" {\n" +" dstInt[ cb.m_offset+gIdx ] = cb.m_data.x;\n" +" }\n" +"}\n" +"\n" +"__kernel\n" +"__attribute__((reqd_work_group_size(64,1,1)))\n" +"void FillInt2Kernel(__global int2* dstInt2, \n" +" ConstBuffer cb)\n" +"{\n" +" int gIdx = GET_GLOBAL_IDX;\n" +"\n" +" if( gIdx < cb.m_n )\n" +" {\n" +" dstInt2[ cb.m_offset+gIdx ] = make_int2( cb.m_data.x, cb.m_data.y );\n" +" }\n" +"}\n" +"\n" +"__kernel\n" +"__attribute__((reqd_work_group_size(64,1,1)))\n" +"void FillInt4Kernel(__global int4* dstInt4, \n" +" ConstBuffer cb)\n" +"{\n" +" int gIdx = GET_GLOBAL_IDX;\n" +"\n" +" if( gIdx < cb.m_n )\n" +" {\n" +" dstInt4[ cb.m_offset+gIdx ] = cb.m_data;\n" +" }\n" +"}\n" +"\n" +; diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Fill/FillKernelsDX11.h b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Fill/FillKernelsDX11.h new file mode 100644 index 000000000..1cdc6ab61 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Fill/FillKernelsDX11.h @@ -0,0 +1,69 @@ +static const char* fillKernelsDX11= \ +"/*\n" +" 2011 Takahiro Harada\n" +"*/\n" +"\n" +"typedef uint u32;\n" +"\n" +"#define GET_GROUP_IDX groupIdx.x\n" +"#define GET_LOCAL_IDX localIdx.x\n" +"#define GET_GLOBAL_IDX globalIdx.x\n" +"#define GROUP_LDS_BARRIER GroupMemoryBarrierWithGroupSync()\n" +"#define GROUP_MEM_FENCE\n" +"#define DEFAULT_ARGS uint3 globalIdx : SV_DispatchThreadID, uint3 localIdx : SV_GroupThreadID, uint3 groupIdx : SV_GroupID\n" +"#define AtomInc(x) InterlockedAdd(x, 1)\n" +"#define AtomInc1(x, out) InterlockedAdd(x, 1, out)\n" +"\n" +"#define make_uint4 uint4\n" +"#define make_uint2 uint2\n" +"#define make_int2 int2\n" +"\n" +"\n" +"cbuffer CB : register( b0 )\n" +"{\n" +" int4 m_data;\n" +" int m_offset;\n" +" int m_n;\n" +" int m_padding[2];\n" +"};\n" +"\n" +"\n" +"RWStructuredBuffer dstInt : register( u0 );\n" +"\n" +"[numthreads(64, 1, 1)]\n" +"void FillIntKernel( DEFAULT_ARGS )\n" +"{\n" +" int gIdx = GET_GLOBAL_IDX;\n" +"\n" +" if( gIdx < m_n )\n" +" {\n" +" dstInt[ m_offset+gIdx ] = m_data.x;\n" +" }\n" +"}\n" +"\n" +"RWStructuredBuffer dstInt2 : register( u0 );\n" +"\n" +"[numthreads(64, 1, 1)]\n" +"void FillInt2Kernel( DEFAULT_ARGS )\n" +"{\n" +" int gIdx = GET_GLOBAL_IDX;\n" +"\n" +" if( gIdx < m_n )\n" +" {\n" +" dstInt2[ m_offset+gIdx ] = make_int2( m_data.x, m_data.y );\n" +" }\n" +"}\n" +"\n" +"RWStructuredBuffer dstInt4 : register( u0 );\n" +"\n" +"[numthreads(64, 1, 1)]\n" +"void FillInt4Kernel( DEFAULT_ARGS )\n" +"{\n" +" int gIdx = GET_GLOBAL_IDX;\n" +"\n" +" if( gIdx < m_n )\n" +" {\n" +" dstInt4[ m_offset+gIdx ] = m_data;\n" +" }\n" +"}\n" +; diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Math/Array.h b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Math/Array.h new file mode 100644 index 000000000..5a63eee38 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Math/Array.h @@ -0,0 +1,231 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + +#ifndef ARRAY_H +#define ARRAY_H + +#include +#include +#include +#include + +namespace adl +{ + +template +class Array +{ + public: + __inline + Array(); + __inline + Array(int size); + __inline + ~Array(); + __inline + T& operator[] (int idx); + __inline + const T& operator[] (int idx) const; + __inline + void pushBack(const T& elem); + __inline + void popBack(); + __inline + void clear(); + __inline + void setSize(int size); + __inline + int getSize() const; + __inline + T* begin(); + __inline + const T* begin() const; + __inline + T* end(); + __inline + const T* end() const; + __inline + int indexOf(const T& data) const; + __inline + void removeAt(int idx); + __inline + T& expandOne(); + + private: + Array(const Array& a){} + + private: + enum + { + DEFAULT_SIZE = 128, + INCREASE_SIZE = 128, + }; + + T* m_data; + int m_size; + int m_capacity; +}; + +template +Array::Array() +{ + m_size = 0; + m_capacity = DEFAULT_SIZE; +// m_data = new T[ m_capacity ]; + m_data = (T*)_aligned_malloc(sizeof(T)*m_capacity, 16); + for(int i=0; i +Array::Array(int size) +{ + m_size = size; + m_capacity = size; +// m_data = new T[ m_capacity ]; + m_data = (T*)_aligned_malloc(sizeof(T)*m_capacity, 16); + for(int i=0; i +Array::~Array() +{ + if( m_data ) + { +// delete [] m_data; + _aligned_free( m_data ); + m_data = NULL; + } +} + +template +T& Array::operator[](int idx) +{ + ADLASSERT(idx +const T& Array::operator[](int idx) const +{ + ADLASSERT(idx +void Array::pushBack(const T& elem) +{ + if( m_size == m_capacity ) + { + int oldCap = m_capacity; + m_capacity += INCREASE_SIZE; +// T* s = new T[m_capacity]; + T* s = (T*)_aligned_malloc(sizeof(T)*m_capacity, 16); + memcpy( s, m_data, sizeof(T)*oldCap ); +// delete [] m_data; + _aligned_free( m_data ); + m_data = s; + } + m_data[ m_size++ ] = elem; +} + +template +void Array::popBack() +{ + ADLASSERT( m_size>0 ); + m_size--; +} + +template +void Array::clear() +{ + m_size = 0; +} + +template +void Array::setSize(int size) +{ + if( size > m_capacity ) + { + int oldCap = m_capacity; + m_capacity = size; +// T* s = new T[m_capacity]; + T* s = (T*)_aligned_malloc(sizeof(T)*m_capacity, 16); + for(int i=0; i +int Array::getSize() const +{ + return m_size; +} + +template +const T* Array::begin() const +{ + return m_data; +} + +template +T* Array::begin() +{ + return m_data; +} + +template +T* Array::end() +{ + return m_data+m_size; +} + +template +const T* Array::end() const +{ + return m_data+m_size; +} + +template +int Array::indexOf(const T& data) const +{ + for(int i=0; i +void Array::removeAt(int idx) +{ + ADLASSERT(idx +T& Array::expandOne() +{ + setSize( m_size+1 ); + return m_data[ m_size-1 ]; +} + +}; + +#endif + diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Math/Float2.inl b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Math/Float2.inl new file mode 100644 index 000000000..4b2a9e7f7 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Math/Float2.inl @@ -0,0 +1,173 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +__inline +float2 make_float2(float x, float y) +{ + float2 v; + v.s[0] = x; v.s[1] = y; + return v; +} + +__inline +float2 make_float2(float x) +{ + return make_float2(x,x); +} + +__inline +float2 make_float2(const int2& x) +{ + return make_float2((float)x.s[0], (float)x.s[1]); +} + + + + +__inline +float2 operator-(const float2& a) +{ + return make_float2(-a.x, -a.y); +} + +__inline +float2 operator*(const float2& a, const float2& b) +{ + float2 out; + out.s[0] = a.s[0]*b.s[0]; + out.s[1] = a.s[1]*b.s[1]; + return out; +} + +__inline +float2 operator*(float a, const float2& b) +{ + return make_float2(a*b.s[0], a*b.s[1]); +} + +__inline +float2 operator*(const float2& b, float a) +{ + return make_float2(a*b.s[0], a*b.s[1]); +} + +__inline +void operator*=(float2& a, const float2& b) +{ + a.s[0]*=b.s[0]; + a.s[1]*=b.s[1]; +} + +__inline +void operator*=(float2& a, float b) +{ + a.s[0]*=b; + a.s[1]*=b; +} + +__inline +float2 operator/(const float2& a, const float2& b) +{ + float2 out; + out.s[0] = a.s[0]/b.s[0]; + out.s[1] = a.s[1]/b.s[1]; + return out; +} + +__inline +float2 operator/(const float2& b, float a) +{ + return make_float2(b.s[0]/a, b.s[1]/a); +} + +__inline +void operator/=(float2& a, const float2& b) +{ + a.s[0]/=b.s[0]; + a.s[1]/=b.s[1]; +} + +__inline +void operator/=(float2& a, float b) +{ + a.s[0]/=b; + a.s[1]/=b; +} +// + +__inline +float2 operator+(const float2& a, const float2& b) +{ + float2 out; + out.s[0] = a.s[0]+b.s[0]; + out.s[1] = a.s[1]+b.s[1]; + return out; +} + +__inline +float2 operator+(const float2& a, float b) +{ + float2 out; + out.s[0] = a.s[0]+b; + out.s[1] = a.s[1]+b; + return out; +} + +__inline +float2 operator-(const float2& a, const float2& b) +{ + float2 out; + out.s[0] = a.s[0]-b.s[0]; + out.s[1] = a.s[1]-b.s[1]; + return out; +} + +__inline +float2 operator-(const float2& a, float b) +{ + float2 out; + out.s[0] = a.s[0]-b; + out.s[1] = a.s[1]-b; + return out; +} + +__inline +void operator+=(float2& a, const float2& b) +{ + a.s[0]+=b.s[0]; + a.s[1]+=b.s[1]; +} + +__inline +void operator+=(float2& a, float b) +{ + a.s[0]+=b; + a.s[1]+=b; +} + +__inline +void operator-=(float2& a, const float2& b) +{ + a.s[0]-=b.s[0]; + a.s[1]-=b.s[1]; +} + +__inline +void operator-=(float2& a, float b) +{ + a.s[0]-=b; + a.s[1]-=b; +} diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Math/Float4.inl b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Math/Float4.inl new file mode 100644 index 000000000..458a91f65 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Math/Float4.inl @@ -0,0 +1,375 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +//#define CHECK_ALIGNMENT(a) ADLASSERT((u32(&(a)) & 0xf) == 0); +#define CHECK_ALIGNMENT(a) a; + + +__inline +float4 make_float4(float x, float y, float z, float w = 0.f) +{ + float4 v; + v.x = x; v.y = y; v.z = z; v.w = w; + return v; +} + +__inline +float4 make_float4(float x) +{ + return make_float4(x,x,x,x); +} + +__inline +float4 make_float4(const int4& x) +{ + return make_float4((float)x.s[0], (float)x.s[1], (float)x.s[2], (float)x.s[3]); +} + +__inline +int4 make_int4(int x, int y, int z, int w = 0) +{ + int4 v; + v.s[0] = x; v.s[1] = y; v.s[2] = z; v.s[3] = w; + return v; +} + +__inline +int4 make_int4(int x) +{ + return make_int4(x,x,x,x); +} + +__inline +int4 make_int4(const float4& x) +{ + return make_int4((int)x.x, (int)x.y, (int)x.z, (int)x.w); +} + +__inline +int2 make_int2(int a, int b) +{ + int2 ans; ans.x = a; ans.y = b; + return ans; +} + +__inline +bool operator ==(const int2& a, const int2& b) +{ + return a.x==b.x && a.y==b.y; +} + +__inline +bool operator ==(const int4& a, const int4& b) +{ + return a.x==b.x && a.y==b.y && a.z==b.z && a.w==b.w; +} + +__inline +bool operator ==(const float2& a, const float2& b) +{ + return a.x==b.x && a.y==b.y; +} + +__inline +bool operator ==(const float4& a, const float4& b) +{ + return a.x==b.x && a.y==b.y && a.z==b.z && a.w==b.w; +} + +__inline +float4 operator-(const float4& a) +{ + return make_float4(-a.x, -a.y, -a.z, -a.w); +} + +__inline +float4 operator*(const float4& a, const float4& b) +{ +// ADLASSERT((u32(&a) & 0xf) == 0); + + float4 out; + out.s[0] = a.s[0]*b.s[0]; + out.s[1] = a.s[1]*b.s[1]; + out.s[2] = a.s[2]*b.s[2]; + out.s[3] = a.s[3]*b.s[3]; + return out; +} + +__inline +float4 operator*(float a, const float4& b) +{ + return make_float4(a*b.s[0], a*b.s[1], a*b.s[2], a*b.s[3]); +} + +__inline +float4 operator*(const float4& b, float a) +{ + CHECK_ALIGNMENT(b); + + return make_float4(a*b.s[0], a*b.s[1], a*b.s[2], a*b.s[3]); +} + +__inline +void operator*=(float4& a, const float4& b) +{ + CHECK_ALIGNMENT(a); + + a.s[0]*=b.s[0]; + a.s[1]*=b.s[1]; + a.s[2]*=b.s[2]; + a.s[3]*=b.s[3]; +} + +__inline +void operator*=(float4& a, float b) +{ + CHECK_ALIGNMENT(a); + + a.s[0]*=b; + a.s[1]*=b; + a.s[2]*=b; + a.s[3]*=b; +} +/* +__inline +bool operator ==(const float4& a, const float4& b) +{ + + +} +*/ +// +__inline +float4 operator/(const float4& a, const float4& b) +{ + CHECK_ALIGNMENT(a); + + float4 out; + out.s[0] = a.s[0]/b.s[0]; + out.s[1] = a.s[1]/b.s[1]; + out.s[2] = a.s[2]/b.s[2]; + out.s[3] = a.s[3]/b.s[3]; + return out; +} + +__inline +float4 operator/(const float4& b, float a) +{ + CHECK_ALIGNMENT(b); + + return make_float4(b.s[0]/a, b.s[1]/a, b.s[2]/a, b.s[3]/a); +} + +__inline +void operator/=(float4& a, const float4& b) +{ + a.s[0]/=b.s[0]; + a.s[1]/=b.s[1]; + a.s[2]/=b.s[2]; + a.s[3]/=b.s[3]; +} + +__inline +void operator/=(float4& a, float b) +{ + ADLASSERT((u32(&a) & 0xf) == 0); + + a.s[0]/=b; + a.s[1]/=b; + a.s[2]/=b; + a.s[3]/=b; +} +// + +__inline +float4 operator+(const float4& a, const float4& b) +{ + CHECK_ALIGNMENT(a); + + float4 out; + out.s[0] = a.s[0]+b.s[0]; + out.s[1] = a.s[1]+b.s[1]; + out.s[2] = a.s[2]+b.s[2]; + out.s[3] = a.s[3]+b.s[3]; + return out; +} + +__inline +float4 operator+(const float4& a, float b) +{ + CHECK_ALIGNMENT(a); + + float4 out; + out.s[0] = a.s[0]+b; + out.s[1] = a.s[1]+b; + out.s[2] = a.s[2]+b; + out.s[3] = a.s[3]+b; + return out; +} + +__inline +float4 operator-(const float4& a, const float4& b) +{ + CHECK_ALIGNMENT(a); + + float4 out; + out.s[0] = a.s[0]-b.s[0]; + out.s[1] = a.s[1]-b.s[1]; + out.s[2] = a.s[2]-b.s[2]; + out.s[3] = a.s[3]-b.s[3]; + return out; +} + +__inline +float4 operator-(const float4& a, float b) +{ + CHECK_ALIGNMENT(a); + + float4 out; + out.s[0] = a.s[0]-b; + out.s[1] = a.s[1]-b; + out.s[2] = a.s[2]-b; + out.s[3] = a.s[3]-b; + return out; +} + +__inline +void operator+=(float4& a, const float4& b) +{ + CHECK_ALIGNMENT(a); + + a.s[0]+=b.s[0]; + a.s[1]+=b.s[1]; + a.s[2]+=b.s[2]; + a.s[3]+=b.s[3]; +} + +__inline +void operator+=(float4& a, float b) +{ + CHECK_ALIGNMENT(a); + + a.s[0]+=b; + a.s[1]+=b; + a.s[2]+=b; + a.s[3]+=b; +} + +__inline +void operator-=(float4& a, const float4& b) +{ + CHECK_ALIGNMENT(a); + + a.s[0]-=b.s[0]; + a.s[1]-=b.s[1]; + a.s[2]-=b.s[2]; + a.s[3]-=b.s[3]; +} + +__inline +void operator-=(float4& a, float b) +{ + CHECK_ALIGNMENT(a); + + a.s[0]-=b; + a.s[1]-=b; + a.s[2]-=b; + a.s[3]-=b; +} + + + + + +__inline +float4 cross3(const float4& a, const float4& b) +{ + return make_float4(a.s[1]*b.s[2]-a.s[2]*b.s[1], + a.s[2]*b.s[0]-a.s[0]*b.s[2], + a.s[0]*b.s[1]-a.s[1]*b.s[0], + 0); +} + +__inline +float dot3F4(const float4& a, const float4& b) +{ + return a.x*b.x+a.y*b.y+a.z*b.z; +} + +__inline +float length3(const float4& a) +{ + return sqrtf(dot3F4(a,a)); +} + +__inline +float dot4(const float4& a, const float4& b) +{ + return a.x*b.x+a.y*b.y+a.z*b.z+a.w*b.w; +} + +// for height +__inline +float dot3w1(const float4& point, const float4& eqn) +{ + return point.x*eqn.x+point.y*eqn.y+point.z*eqn.z+eqn.w; +} + +__inline +float4 normalize3(const float4& a) +{ + float length = sqrtf(dot3F4(a, a)); + return 1.f/length * a; +} + +__inline +float4 normalize4(const float4& a) +{ + float length = sqrtf(dot4(a, a)); + return 1.f/length * a; +} + +__inline +float4 createEquation(const float4& a, const float4& b, const float4& c) +{ + float4 eqn; + float4 ab = b-a; + float4 ac = c-a; + eqn = normalize3( cross3(ab, ac) ); + eqn.w = -dot3F4(eqn,a); + return eqn; +} + +__inline +float intersectPlaneLine( const float4& planeEqn, const float4& vec, const float4& orig ) +{ + return (-planeEqn.w - dot3F4(planeEqn, orig))/dot3F4(planeEqn, vec); +} + +template<> +__inline +float4 max2(const float4& a, const float4& b) +{ + return make_float4( max2(a.x,b.x), max2(a.y,b.y), max2(a.z,b.z), max2(a.w,b.w) ); +} + +template<> +__inline +float4 min2(const float4& a, const float4& b) +{ + return make_float4( min2(a.x,b.x), min2(a.y,b.y), min2(a.z,b.z), min2(a.w,b.w) ); +} + diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Math/Math.h b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Math/Math.h new file mode 100644 index 000000000..0126e7289 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Math/Math.h @@ -0,0 +1,224 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +#ifndef CL_MATH_H +#define CL_MATH_H + +#include +#include +#include +#include + + +#include + +#include +#define pxSort std::sort + +#define PI 3.14159265358979323846f +#define NEXTMULTIPLEOF(num, alignment) (((num)/(alignment) + (((num)%(alignment)==0)?0:1))*(alignment)) + + +#define _MEM_CLASSALIGN16 __declspec(align(16)) +#define _MEM_ALIGNED_ALLOCATOR16 void* operator new(size_t size) { return _aligned_malloc( size, 16 ); } \ + void operator delete(void *p) { _aligned_free( p ); } \ + void* operator new[](size_t size) { return _aligned_malloc( size, 16 ); } \ + void operator delete[](void *p) { _aligned_free( p ); } \ + void* operator new(size_t size, void* p) { return p; } \ + void operator delete(void *p, void* pp) {} + +namespace adl +{ + +template +T nextPowerOf2(T n) +{ + n -= 1; + for(int i=0; i>i); + return n+1; +} + +typedef unsigned int u32; +typedef unsigned short u16; +typedef unsigned char u8; + +_MEM_CLASSALIGN16 +struct float4 +{ + _MEM_ALIGNED_ALLOCATOR16; + union + { + struct + { + float x,y,z,w; + }; + struct + { + float s[4]; + }; + __m128 m_quad; + }; +}; + +_MEM_CLASSALIGN16 +struct int4 +{ + _MEM_ALIGNED_ALLOCATOR16; + union + { + struct + { + int x,y,z,w; + }; + struct + { + int s[4]; + }; + }; +}; + +_MEM_CLASSALIGN16 +struct uint4 +{ + _MEM_ALIGNED_ALLOCATOR16; + union + { + struct + { + u32 x,y,z,w; + }; + struct + { + u32 s[4]; + }; + }; +}; + +struct int2 +{ + union + { + struct + { + int x,y; + }; + struct + { + int s[2]; + }; + }; +}; + +struct float2 +{ + union + { + struct + { + float x,y; + }; + struct + { + float s[2]; + }; + }; +}; + +template +__inline +T max2(const T& a, const T& b) +{ + return (a>b)? a:b; +} + +template +__inline +T min2(const T& a, const T& b) +{ + return (a +#include + + +template +void swap2(T& a, T& b) +{ + T tmp = a; + a = b; + b = tmp; +} + + +__inline +void seedRandom(int seed) +{ + srand( seed ); +} + +template +__inline +T getRandom(const T& minV, const T& maxV) +{ + float r = (rand()%10000)/10000.f; + T range = maxV - minV; + return (T)(minV + r*range); +} + +template<> +__inline +float4 getRandom(const float4& minV, const float4& maxV) +{ + float4 r = make_float4( (rand()%10000)/10000.f, (rand()%10000)/10000.f, (rand()%10000)/10000.f, (rand()%10000)/10000.f ); + float4 range = maxV - minV; + return (minV + r*range); +} + + + +template +T* addByteOffset(void* baseAddr, u32 offset) +{ + return (T*)(((u32)baseAddr)+offset); +} + + +struct Pair32 +{ + Pair32(){} + Pair32(u32 a, u32 b) : m_a(a), m_b(b){} + + u32 m_a; + u32 m_b; +}; + +struct PtrPair +{ + PtrPair(){} + PtrPair(void* a, void* b) : m_a(a), m_b(b){} + template + PtrPair(T* a, T* b) : m_a((void*)a), m_b((void*)b){} + + void* m_a; + void* m_b; +}; + +}; + +#endif diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Math/MathCL.h b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Math/MathCL.h new file mode 100644 index 000000000..6e36881ef --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Math/MathCL.h @@ -0,0 +1,357 @@ + +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +#pragma OPENCL EXTENSION cl_amd_printf : enable +#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable +#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable +#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable +#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable +#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable + +typedef unsigned int u32; +typedef unsigned short u16; +typedef unsigned char u8; + +#define GET_GROUP_IDX get_group_id(0) +#define GET_LOCAL_IDX get_local_id(0) +#define GET_GLOBAL_IDX get_global_id(0) +#define GET_GROUP_SIZE get_local_size(0) +#define GET_NUM_GROUPS get_num_groups(0) +#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) +#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) +#define AtomInc(x) atom_inc(&(x)) +#define AtomInc1(x, out) out = atom_inc(&(x)) +#define AppendInc(x, out) out = atomic_inc(x) +#define AtomAdd(x, value) atom_add(&(x), value) +#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value ) +#define AtomXhg(x, value) atom_xchg ( &(x), value ) + + +#define SELECT_UINT4( b, a, condition ) select( b,a,condition ) + +#define make_float4 (float4) +#define make_float2 (float2) +#define make_uint4 (uint4) +#define make_int4 (int4) +#define make_uint2 (uint2) +#define make_int2 (int2) + + +#define max2 max +#define min2 min + + +/////////////////////////////////////// +// Vector +/////////////////////////////////////// +__inline +float fastDiv(float numerator, float denominator) +{ + return native_divide(numerator, denominator); +// return numerator/denominator; +} + +__inline +float4 fastDiv4(float4 numerator, float4 denominator) +{ + return native_divide(numerator, denominator); +} + +__inline +float fastSqrtf(float f2) +{ + return native_sqrt(f2); +// return sqrt(f2); +} + +__inline +float fastRSqrt(float f2) +{ + return native_rsqrt(f2); +} + +__inline +float fastLength4(float4 v) +{ + return fast_length(v); +} + +__inline +float4 fastNormalize4(float4 v) +{ + return fast_normalize(v); +} + + +__inline +float sqrtf(float a) +{ +// return sqrt(a); + return native_sqrt(a); +} + +__inline +float4 cross3(float4 a, float4 b) +{ + return cross(a,b); +} + +__inline +float dot3F4(float4 a, float4 b) +{ + float4 a1 = make_float4(a.xyz,0.f); + float4 b1 = make_float4(b.xyz,0.f); + return dot(a1, b1); +} + +__inline +float length3(const float4 a) +{ + return sqrtf(dot3F4(a,a)); +} + +__inline +float dot4(const float4 a, const float4 b) +{ + return dot( a, b ); +} + +// for height +__inline +float dot3w1(const float4 point, const float4 eqn) +{ + return dot3F4(point,eqn) + eqn.w; +} + +__inline +float4 normalize3(const float4 a) +{ + float4 n = make_float4(a.x, a.y, a.z, 0.f); + return fastNormalize4( n ); +// float length = sqrtf(dot3F4(a, a)); +// return 1.f/length * a; +} + +__inline +float4 normalize4(const float4 a) +{ + float length = sqrtf(dot4(a, a)); + return 1.f/length * a; +} + +__inline +float4 createEquation(const float4 a, const float4 b, const float4 c) +{ + float4 eqn; + float4 ab = b-a; + float4 ac = c-a; + eqn = normalize3( cross3(ab, ac) ); + eqn.w = -dot3F4(eqn,a); + return eqn; +} + +/////////////////////////////////////// +// Matrix3x3 +/////////////////////////////////////// + +typedef struct +{ + float4 m_row[3]; +}Matrix3x3; + +__inline +Matrix3x3 mtZero(); + +__inline +Matrix3x3 mtIdentity(); + +__inline +Matrix3x3 mtTranspose(Matrix3x3 m); + +__inline +Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b); + +__inline +float4 mtMul1(Matrix3x3 a, float4 b); + +__inline +float4 mtMul3(float4 a, Matrix3x3 b); + +__inline +Matrix3x3 mtZero() +{ + Matrix3x3 m; + m.m_row[0] = (float4)(0.f); + m.m_row[1] = (float4)(0.f); + m.m_row[2] = (float4)(0.f); + return m; +} + +__inline +Matrix3x3 mtIdentity() +{ + Matrix3x3 m; + m.m_row[0] = (float4)(1,0,0,0); + m.m_row[1] = (float4)(0,1,0,0); + m.m_row[2] = (float4)(0,0,1,0); + return m; +} + +__inline +Matrix3x3 mtTranspose(Matrix3x3 m) +{ + Matrix3x3 out; + out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f); + out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f); + out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f); + return out; +} + +__inline +Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b) +{ + Matrix3x3 transB; + transB = mtTranspose( b ); + Matrix3x3 ans; + // why this doesn't run when 0ing in the for{} + a.m_row[0].w = 0.f; + a.m_row[1].w = 0.f; + a.m_row[2].w = 0.f; + for(int i=0; i<3; i++) + { +// a.m_row[i].w = 0.f; + ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]); + ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]); + ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]); + ans.m_row[i].w = 0.f; + } + return ans; +} + +__inline +float4 mtMul1(Matrix3x3 a, float4 b) +{ + float4 ans; + ans.x = dot3F4( a.m_row[0], b ); + ans.y = dot3F4( a.m_row[1], b ); + ans.z = dot3F4( a.m_row[2], b ); + ans.w = 0.f; + return ans; +} + +__inline +float4 mtMul3(float4 a, Matrix3x3 b) +{ + float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0); + float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0); + float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0); + + float4 ans; + ans.x = dot3F4( a, colx ); + ans.y = dot3F4( a, coly ); + ans.z = dot3F4( a, colz ); + return ans; +} + +/////////////////////////////////////// +// Quaternion +/////////////////////////////////////// + +typedef float4 Quaternion; + +__inline +Quaternion qtMul(Quaternion a, Quaternion b); + +__inline +Quaternion qtNormalize(Quaternion in); + +__inline +float4 qtRotate(Quaternion q, float4 vec); + +__inline +Quaternion qtInvert(Quaternion q); + +__inline +Matrix3x3 qtGetRotationMatrix(Quaternion q); + + + +__inline +Quaternion qtMul(Quaternion a, Quaternion b) +{ + Quaternion ans; + ans = cross3( a, b ); + ans += a.w*b+b.w*a; +// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); + ans.w = a.w*b.w - dot3F4(a, b); + return ans; +} + +__inline +Quaternion qtNormalize(Quaternion in) +{ + return fastNormalize4(in); +// in /= length( in ); +// return in; +} +__inline +float4 qtRotate(Quaternion q, float4 vec) +{ + Quaternion qInv = qtInvert( q ); + float4 vcpy = vec; + vcpy.w = 0.f; + float4 out = qtMul(qtMul(q,vcpy),qInv); + return out; +} + +__inline +Quaternion qtInvert(Quaternion q) +{ + return (Quaternion)(-q.xyz, q.w); +} + +__inline +float4 qtInvRotate(const Quaternion q, float4 vec) +{ + return qtRotate( qtInvert( q ), vec ); +} + +__inline +Matrix3x3 qtGetRotationMatrix(Quaternion quat) +{ + float4 quat2 = (float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f); + Matrix3x3 out; + + out.m_row[0].x=1-2*quat2.y-2*quat2.z; + out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z; + out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y; + out.m_row[0].w = 0.f; + + out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z; + out.m_row[1].y=1-2*quat2.x-2*quat2.z; + out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x; + out.m_row[1].w = 0.f; + + out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y; + out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x; + out.m_row[2].z=1-2*quat2.x-2*quat2.y; + out.m_row[2].w = 0.f; + + return out; +} + + diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Math/Matrix3x3.h b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Math/Matrix3x3.h new file mode 100644 index 000000000..d68176835 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Math/Matrix3x3.h @@ -0,0 +1,197 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + +#ifndef MATRIX3X3_H +#define MATRIX3X3_H + +#include + +/////////////////////////////////////// +// Matrix3x3 +/////////////////////////////////////// +namespace adl +{ + +typedef +_MEM_CLASSALIGN16 struct +{ + _MEM_ALIGNED_ALLOCATOR16; + float4 m_row[3]; +}Matrix3x3; + +__inline +Matrix3x3 mtZero(); + +__inline +Matrix3x3 mtIdentity(); + +__inline +Matrix3x3 mtDiagonal(float a, float b, float c); + +__inline +Matrix3x3 mtTranspose(const Matrix3x3& m); + +__inline +Matrix3x3 mtMul(const Matrix3x3& a, const Matrix3x3& b); + +__inline +float4 mtMul1(const Matrix3x3& a, const float4& b); + +__inline +Matrix3x3 mtMul2(float a, const Matrix3x3& b); + +__inline +float4 mtMul3(const float4& b, const Matrix3x3& a); + +__inline +Matrix3x3 mtInvert(const Matrix3x3& m); + +__inline +Matrix3x3 mtZero() +{ + Matrix3x3 m; + m.m_row[0] = make_float4(0.f); + m.m_row[1] = make_float4(0.f); + m.m_row[2] = make_float4(0.f); + return m; +} + +__inline +Matrix3x3 mtIdentity() +{ + Matrix3x3 m; + m.m_row[0] = make_float4(1,0,0); + m.m_row[1] = make_float4(0,1,0); + m.m_row[2] = make_float4(0,0,1); + return m; +} + +__inline +Matrix3x3 mtDiagonal(float a, float b, float c) +{ + Matrix3x3 m; + m.m_row[0] = make_float4(a,0,0); + m.m_row[1] = make_float4(0,b,0); + m.m_row[2] = make_float4(0,0,c); + return m; +} + +__inline +Matrix3x3 mtTranspose(const Matrix3x3& m) +{ + Matrix3x3 out; + out.m_row[0] = make_float4(m.m_row[0].s[0], m.m_row[1].s[0], m.m_row[2].s[0], 0.f); + out.m_row[1] = make_float4(m.m_row[0].s[1], m.m_row[1].s[1], m.m_row[2].s[1], 0.f); + out.m_row[2] = make_float4(m.m_row[0].s[2], m.m_row[1].s[2], m.m_row[2].s[2], 0.f); + return out; +} + +__inline +Matrix3x3 mtMul(const Matrix3x3& a, const Matrix3x3& b) +{ + Matrix3x3 transB; + transB = mtTranspose( b ); + Matrix3x3 ans; + for(int i=0; i<3; i++) + { + ans.m_row[i].s[0] = dot3F4(a.m_row[i],transB.m_row[0]); + ans.m_row[i].s[1] = dot3F4(a.m_row[i],transB.m_row[1]); + ans.m_row[i].s[2] = dot3F4(a.m_row[i],transB.m_row[2]); + } + return ans; +} + +__inline +float4 mtMul1(const Matrix3x3& a, const float4& b) +{ + float4 ans; + ans.s[0] = dot3F4( a.m_row[0], b ); + ans.s[1] = dot3F4( a.m_row[1], b ); + ans.s[2] = dot3F4( a.m_row[2], b ); + return ans; +} + +__inline +Matrix3x3 mtMul2(float a, const Matrix3x3& b) +{ + Matrix3x3 ans; + ans.m_row[0] = a*b.m_row[0]; + ans.m_row[1] = a*b.m_row[1]; + ans.m_row[2] = a*b.m_row[2]; + return ans; +} + +__inline +float4 mtMul3(const float4& a, const Matrix3x3& b) +{ + float4 ans; + ans.x = a.x*b.m_row[0].x + a.y*b.m_row[1].x + a.z*b.m_row[2].x; + ans.y = a.x*b.m_row[0].y + a.y*b.m_row[1].y + a.z*b.m_row[2].y; + ans.z = a.x*b.m_row[0].z + a.y*b.m_row[1].z + a.z*b.m_row[2].z; + return ans; +} + +__inline +Matrix3x3 mtInvert(const Matrix3x3& m) +{ + float det = m.m_row[0].s[0]*m.m_row[1].s[1]*m.m_row[2].s[2]+m.m_row[1].s[0]*m.m_row[2].s[1]*m.m_row[0].s[2]+m.m_row[2].s[0]*m.m_row[0].s[1]*m.m_row[1].s[2] + -m.m_row[0].s[0]*m.m_row[2].s[1]*m.m_row[1].s[2]-m.m_row[2].s[0]*m.m_row[1].s[1]*m.m_row[0].s[2]-m.m_row[1].s[0]*m.m_row[0].s[1]*m.m_row[2].s[2]; + + ADLASSERT( det ); + + Matrix3x3 ans; + ans.m_row[0].s[0] = m.m_row[1].s[1]*m.m_row[2].s[2] - m.m_row[1].s[2]*m.m_row[2].s[1]; + ans.m_row[0].s[1] = m.m_row[0].s[2]*m.m_row[2].s[1] - m.m_row[0].s[1]*m.m_row[2].s[2]; + ans.m_row[0].s[2] = m.m_row[0].s[1]*m.m_row[1].s[2] - m.m_row[0].s[2]*m.m_row[1].s[1]; + ans.m_row[0].w = 0.f; + + ans.m_row[1].s[0] = m.m_row[1].s[2]*m.m_row[2].s[0] - m.m_row[1].s[0]*m.m_row[2].s[2]; + ans.m_row[1].s[1] = m.m_row[0].s[0]*m.m_row[2].s[2] - m.m_row[0].s[2]*m.m_row[2].s[0]; + ans.m_row[1].s[2] = m.m_row[0].s[2]*m.m_row[1].s[0] - m.m_row[0].s[0]*m.m_row[1].s[2]; + ans.m_row[1].w = 0.f; + + ans.m_row[2].s[0] = m.m_row[1].s[0]*m.m_row[2].s[1] - m.m_row[1].s[1]*m.m_row[2].s[0]; + ans.m_row[2].s[1] = m.m_row[0].s[1]*m.m_row[2].s[0] - m.m_row[0].s[0]*m.m_row[2].s[1]; + ans.m_row[2].s[2] = m.m_row[0].s[0]*m.m_row[1].s[1] - m.m_row[0].s[1]*m.m_row[1].s[0]; + ans.m_row[2].w = 0.f; + + ans = mtMul2((1.0f/det), ans); + return ans; +} + +__inline +Matrix3x3 mtSet( const float4& a, const float4& b, const float4& c ) +{ + Matrix3x3 m; + m.m_row[0] = a; + m.m_row[1] = b; + m.m_row[2] = c; + return m; +} + +__inline +Matrix3x3 operator+(const Matrix3x3& a, const Matrix3x3& b) +{ + Matrix3x3 out; + out.m_row[0] = a.m_row[0] + b.m_row[0]; + out.m_row[1] = a.m_row[1] + b.m_row[1]; + out.m_row[2] = a.m_row[2] + b.m_row[2]; + return out; +} + +}; + +#endif + diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Math/Quaternion.h b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Math/Quaternion.h new file mode 100644 index 000000000..3eeef4431 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Math/Quaternion.h @@ -0,0 +1,159 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +#ifndef QUATERNION_H +#define QUATERNION_H + +#include + +namespace adl +{ + +typedef float4 Quaternion; + +__inline +Quaternion qtSet(const float4& axis, float angle); + +__inline +Quaternion qtMul(const Quaternion& a, const Quaternion& b); + +__inline +float4 qtRotate(const Quaternion& q, const float4& vec); + +__inline +float4 qtInvRotate(const Quaternion& q, const float4& vec); + +__inline +Quaternion qtInvert(const Quaternion& q); + +__inline +Matrix3x3 qtGetRotationMatrix(const Quaternion& quat); + +__inline +Quaternion qtNormalize(const Quaternion& q); + +__inline +Quaternion qtGetIdentity() { return make_float4(0,0,0,1); } + +__inline +Quaternion qtSet(const float4& axis, float angle) +{ + float4 nAxis = normalize3( axis ); + + Quaternion q; + q.s[0] = nAxis.s[0]*sin(angle/2); + q.s[1] = nAxis.s[1]*sin(angle/2); + q.s[2] = nAxis.s[2]*sin(angle/2); + q.s[3] = cos(angle/2); + return q; +} + +__inline +Quaternion qtMul(const Quaternion& a, const Quaternion& b) +{ + Quaternion ans; + ans = cross3( a, b ); + ans += a.s[3]*b + b.s[3]*a; + ans.s[3] = a.s[3]*b.s[3] - (a.s[0]*b.s[0]+a.s[1]*b.s[1]+a.s[2]*b.s[2]); + return ans; +} + +__inline +float4 qtRotate(const Quaternion& q, const float4& vec) +{ + Quaternion vecQ = vec; + vecQ.s[3] = 0.f; + Quaternion qInv = qtInvert( q ); + float4 out = qtMul(qtMul(q,vecQ),qInv); + return out; +} + +__inline +float4 qtInvRotate(const Quaternion& q, const float4& vec) +{ + return qtRotate( qtInvert( q ), vec ); +} + +__inline +Quaternion qtInvert(const Quaternion& q) +{ + Quaternion ans; + ans.s[0] = -q.s[0]; + ans.s[1] = -q.s[1]; + ans.s[2] = -q.s[2]; + ans.s[3] = q.s[3]; + return ans; +} + +__inline +Matrix3x3 qtGetRotationMatrix(const Quaternion& quat) +{ + float4 quat2 = make_float4(quat.s[0]*quat.s[0], quat.s[1]*quat.s[1], quat.s[2]*quat.s[2], 0.f); + Matrix3x3 out; + + out.m_row[0].s[0]=1-2*quat2.s[1]-2*quat2.s[2]; + out.m_row[0].s[1]=2*quat.s[0]*quat.s[1]-2*quat.s[3]*quat.s[2]; + out.m_row[0].s[2]=2*quat.s[0]*quat.s[2]+2*quat.s[3]*quat.s[1]; + out.m_row[0].s[3] = 0.f; + + out.m_row[1].s[0]=2*quat.s[0]*quat.s[1]+2*quat.s[3]*quat.s[2]; + out.m_row[1].s[1]=1-2*quat2.s[0]-2*quat2.s[2]; + out.m_row[1].s[2]=2*quat.s[1]*quat.s[2]-2*quat.s[3]*quat.s[0]; + out.m_row[1].s[3] = 0.f; + + out.m_row[2].s[0]=2*quat.s[0]*quat.s[2]-2*quat.s[3]*quat.s[1]; + out.m_row[2].s[1]=2*quat.s[1]*quat.s[2]+2*quat.s[3]*quat.s[0]; + out.m_row[2].s[2]=1-2*quat2.s[0]-2*quat2.s[1]; + out.m_row[2].s[3] = 0.f; + + return out; +} + +__inline +Quaternion qtGetQuaternion(const Matrix3x3* m) +{ + Quaternion q; + q.w = sqrtf( m[0].m_row[0].x + m[0].m_row[1].y + m[0].m_row[2].z + 1 ) * 0.5f; + float inv4w = 1.f/(4.f*q.w); + q.x = (m[0].m_row[2].y-m[0].m_row[1].z)*inv4w; + q.y = (m[0].m_row[0].z-m[0].m_row[2].x)*inv4w; + q.z = (m[0].m_row[1].x-m[0].m_row[0].y)*inv4w; + + return q; +} + +__inline +Quaternion qtNormalize(const Quaternion& q) +{ + return normalize4(q); +} + +__inline +float4 transform(const float4& p, const float4& translation, const Quaternion& orientation) +{ + return qtRotate( orientation, p ) + translation; +} + +__inline +float4 invTransform(const float4& p, const float4& translation, const Quaternion& orientation) +{ + return qtRotate( qtInvert( orientation ), p-translation ); // use qtInvRotate +} + +}; + +#endif + diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Scan/PrefixScan.h b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Scan/PrefixScan.h new file mode 100644 index 000000000..db7566ede --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Scan/PrefixScan.h @@ -0,0 +1,73 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + +#pragma once + +#include +#include + +namespace adl +{ + +class PrefixScanBase +{ + public: + enum Option + { + INCLUSIVE, + EXCLUSIVE + }; +}; + + +template +class PrefixScan : public PrefixScanBase +{ + public: + typedef Launcher::BufferInfo BufferInfo; + + enum + { + BLOCK_SIZE = 128 + }; + + struct Data + { + Option m_option; + const Device* m_device; + Kernel* m_localScanKernel; + Kernel* m_blockSumKernel; + Kernel* m_propagationKernel; + Buffer* m_workBuffer; + Buffer* m_constBuffer[3];// todo. dx need one for each + int m_maxSize; + }; + + static + Data* allocate(const Device* deviceData, int maxSize, Option option = EXCLUSIVE); + + static + void deallocate(Data* data); + + static + void execute(Data* data, Buffer& src, Buffer& dst, int n, u32* sum = 0); +}; + + + +#include +#include + +}; diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Scan/PrefixScan.inl b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Scan/PrefixScan.inl new file mode 100644 index 000000000..65e8c06a5 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Scan/PrefixScan.inl @@ -0,0 +1,125 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +#define PATH "..\\..\\opencl\\primitives\\AdlPrimitives\\Scan\\PrefixScanKernels" +#define KERNEL0 "LocalScanKernel" +#define KERNEL1 "TopLevelScanKernel" +#define KERNEL2 "AddOffsetKernel" + +#include +#include + +template +typename PrefixScan::Data* PrefixScan::allocate(const Device* device, int maxSize, Option option) +{ + ADLASSERT( TYPE == device->m_type ); + + ADLASSERT( maxSize <= BLOCK_SIZE*2*2048 ); + + const char* src[] = +#if defined(ADL_LOAD_KERNEL_FROM_STRING) + {prefixScanKernelsCL, prefixScanKernelsDX11}; +#else + {0,0}; +#endif + Data* data = new Data; + data->m_device = device; + data->m_localScanKernel = device->getKernel( PATH, KERNEL0, 0, src[TYPE] ); + data->m_blockSumKernel = device->getKernel( PATH, KERNEL1, 0, src[TYPE] ); + data->m_propagationKernel = device->getKernel( PATH, KERNEL2, 0, src[TYPE] ); + + int bufSize = (NEXTMULTIPLEOF( max2( maxSize/BLOCK_SIZE, (int)BLOCK_SIZE ), BLOCK_SIZE )+1); + data->m_workBuffer = new Buffer( device, bufSize ); + data->m_constBuffer[0] = new Buffer( device, 1, BufferBase::BUFFER_CONST ); + data->m_constBuffer[1] = new Buffer( device, 1, BufferBase::BUFFER_CONST ); + data->m_constBuffer[2] = new Buffer( device, 1, BufferBase::BUFFER_CONST ); + + data->m_maxSize = maxSize; + data->m_option = option; + + return data; +} + +template +void PrefixScan::deallocate(Data* data) +{ + delete data->m_workBuffer; + delete data->m_constBuffer[0]; + delete data->m_constBuffer[1]; + delete data->m_constBuffer[2]; + delete data; +} + +template +void PrefixScan::execute(Data* data, Buffer& src, Buffer& dst, int n, u32* sum) +{ + ADLASSERT( data ); + ADLASSERT( n <= data->m_maxSize ); + ADLASSERT( data->m_option == EXCLUSIVE ); + const u32 numBlocks = u32( (n+BLOCK_SIZE*2-1)/(BLOCK_SIZE*2) ); + + + int4 constBuffer; + constBuffer.x = n; + constBuffer.y = numBlocks; + constBuffer.z = (int)nextPowerOf2( numBlocks ); + + Buffer* srcNative = BufferUtils::map( data->m_device, &src ); + Buffer* dstNative = BufferUtils::map( data->m_device, &dst ); + + { + BufferInfo bInfo[] = { BufferInfo( dstNative ), BufferInfo( srcNative ), BufferInfo( data->m_workBuffer ) }; + + Launcher launcher( data->m_device, data->m_localScanKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( *data->m_constBuffer[0], constBuffer ); + launcher.launch1D( numBlocks*BLOCK_SIZE, BLOCK_SIZE ); + } + + { + BufferInfo bInfo[] = { BufferInfo( data->m_workBuffer ) }; + + Launcher launcher( data->m_device, data->m_blockSumKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( *data->m_constBuffer[1], constBuffer ); + launcher.launch1D( BLOCK_SIZE, BLOCK_SIZE ); + } + + + if( numBlocks > 1 ) + { + BufferInfo bInfo[] = { BufferInfo( dstNative ), BufferInfo( data->m_workBuffer ) }; + Launcher launcher( data->m_device, data->m_propagationKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( *data->m_constBuffer[2], constBuffer ); + launcher.launch1D( (numBlocks-1)*BLOCK_SIZE, BLOCK_SIZE ); + } + + DeviceUtils::waitForCompletion( data->m_device ); + if( sum ) + { + dstNative->read( sum, 1, n-1); + } + DeviceUtils::waitForCompletion( data->m_device ); + + BufferUtils::unmap( srcNative, &src ); + BufferUtils::unmap( dstNative, &dst ); +} + +#undef PATH +#undef KERNEL0 +#undef KERNEL1 +#undef KERNEL2 \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Scan/PrefixScanHost.inl b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Scan/PrefixScanHost.inl new file mode 100644 index 000000000..44987f548 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Scan/PrefixScanHost.inl @@ -0,0 +1,74 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +template<> +class PrefixScan : public PrefixScanBase +{ + public: + struct Data + { + Option m_option; + }; + + static + Data* allocate(const Device* deviceData, int maxSize, Option option = EXCLUSIVE) + { + ADLASSERT( deviceData->m_type == TYPE_HOST ); + + Data* data = new Data; + data->m_option = option; + return data; + } + + static + void deallocate(Data* data) + { + delete data; + } + + static + void execute(Data* data, Buffer& src, Buffer& dst, int n, u32* sum = 0) + { + ADLASSERT( src.getType() == TYPE_HOST && dst.getType() == TYPE_HOST ); + HostBuffer& hSrc = (HostBuffer&)src; + HostBuffer& hDst = (HostBuffer&)dst; + + u32 s = 0; + if( data->m_option == EXCLUSIVE ) + { + for(int i=0; i>1; nActive>0; nActive>>=1, offset<<=1) + { + GROUP_LDS_BARRIER; + for(int iIdx=lIdx; iIdx>= 1; + for(int nActive=1; nActive>=1 ) + { + GROUP_LDS_BARRIER; + for( int iIdx = lIdx; iIdx dst : register( u0 ); +RWStructuredBuffer src : register( u1 ); +RWStructuredBuffer sumBuffer : register( u2 ); + + +groupshared u32 ldsData[2048]; + +u32 ScanExclusive(u32 n, int lIdx, int lSize) +{ + u32 blocksum; + int offset = 1; + for(int nActive=n>>1; nActive>0; nActive>>=1, offset<<=1) + { + GROUP_LDS_BARRIER; + for(int iIdx=lIdx; iIdx>= 1; + for(int nActive=1; nActive>=1 ) + { + GROUP_LDS_BARRIER; + for( int iIdx = lIdx; iIdx blockSum2 : register( u1 ); + +[numthreads(WG_SIZE, 1, 1)] +void AddOffsetKernel(uint3 globalIdx : SV_DispatchThreadID, uint3 localIdx : SV_GroupThreadID, uint3 groupIdx : SV_GroupID) +{ + const u32 blockSize = WG_SIZE*2; + + int myIdx = GET_GROUP_IDX+1; + int llIdx = GET_LOCAL_IDX; + + u32 iBlockSum = blockSum2[myIdx]; + + int endValue = min((myIdx+1)*(blockSize), m_numElems); + for(int i=myIdx*blockSize+llIdx; i>1; nActive>0; nActive>>=1, offset<<=1)\n" +" {\n" +" GROUP_LDS_BARRIER;\n" +" for(int iIdx=lIdx; iIdx>= 1;\n" +" for(int nActive=1; nActive>=1 )\n" +" {\n" +" GROUP_LDS_BARRIER;\n" +" for( int iIdx = lIdx; iIdx dst : register( u0 );\n" +"RWStructuredBuffer src : register( u1 );\n" +"RWStructuredBuffer sumBuffer : register( u2 );\n" +"\n" +"\n" +"groupshared u32 ldsData[2048];\n" +"\n" +"u32 ScanExclusive(u32 n, int lIdx, int lSize)\n" +"{\n" +" u32 blocksum;\n" +" int offset = 1;\n" +" for(int nActive=n>>1; nActive>0; nActive>>=1, offset<<=1)\n" +" {\n" +" GROUP_LDS_BARRIER;\n" +" for(int iIdx=lIdx; iIdx>= 1;\n" +" for(int nActive=1; nActive>=1 )\n" +" {\n" +" GROUP_LDS_BARRIER;\n" +" for( int iIdx = lIdx; iIdx blockSum2 : register( u1 );\n" +"\n" +"[numthreads(WG_SIZE, 1, 1)]\n" +"void AddOffsetKernel(uint3 globalIdx : SV_DispatchThreadID, uint3 localIdx : SV_GroupThreadID, uint3 groupIdx : SV_GroupID)\n" +"{\n" +" const u32 blockSize = WG_SIZE*2;\n" +"\n" +" int myIdx = GET_GROUP_IDX+1;\n" +" int llIdx = GET_LOCAL_IDX;\n" +"\n" +" u32 iBlockSum = blockSum2[myIdx];\n" +"\n" +" int endValue = min((myIdx+1)*(blockSize), m_numElems);\n" +" for(int i=myIdx*blockSize+llIdx; i +#include +#include +#include + +namespace adl +{ + +class BoundSearchBase +{ + public: + enum Option + { + BOUND_LOWER, + BOUND_UPPER, + COUNT, + }; +}; + +template +class BoundSearch : public BoundSearchBase +{ + public: + typedef Launcher::BufferInfo BufferInfo; + + struct Data + { + const Device* m_device; + Kernel* m_lowerSortDataKernel; + Kernel* m_upperSortDataKernel; + Kernel* m_subtractKernel; + Buffer* m_constBuffer; + Buffer* m_lower; + Buffer* m_upper; + typename Fill::Data* m_fillData; + }; + + static + Data* allocate(const Device* deviceData, int maxSize = 0); + + static + void deallocate(Data* data); + + // src has to be src[i].m_key <= src[i+1].m_key + static + void execute(Data* data, Buffer& src, u32 nSrc, Buffer& dst, u32 nDst, Option option = BOUND_LOWER ); + +// static +// void execute(Data* data, Buffer& src, Buffer& dst, int n, Option option = ); +}; + +#include +#include + +}; diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Search/BoundSearch.inl b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Search/BoundSearch.inl new file mode 100644 index 000000000..33138b4e5 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Search/BoundSearch.inl @@ -0,0 +1,128 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + +#define PATH "..\\..\\opencl\\primitives\\AdlPrimitives\\Search\\BoundSearchKernels" +#define KERNEL0 "SearchSortDataLowerKernel" +#define KERNEL1 "SearchSortDataUpperKernel" +#define KERNEL2 "SubtractKernel" + +#include +#include + +template +typename BoundSearch::Data* BoundSearch::allocate(const Device* device, int maxSize) +{ + ADLASSERT( TYPE == device->m_type ); + + const char* src[] = +#if defined(ADL_LOAD_KERNEL_FROM_STRING) + {boundSearchKernelsCL, boundSearchKernelsDX11}; +#else + {0,0}; +#endif + + Data* data = new Data; + + data->m_device = device; + data->m_lowerSortDataKernel = device->getKernel( PATH, KERNEL0, 0, src[TYPE] ); + data->m_upperSortDataKernel = device->getKernel( PATH, KERNEL1, 0, src[TYPE] ); + data->m_constBuffer = new Buffer( device, 1, BufferBase::BUFFER_CONST ); + if( maxSize ) + { + data->m_subtractKernel = device->getKernel( PATH, KERNEL2, 0, src[TYPE] ); + } + data->m_lower = (maxSize == 0)? 0: new Buffer( device, maxSize ); + data->m_upper = (maxSize == 0)? 0: new Buffer( device, maxSize ); + data->m_fillData = (maxSize == 0)? 0: Fill::allocate( device ); + + return data; +} + +template +void BoundSearch::deallocate(Data* data) +{ + delete data->m_constBuffer; + if( data->m_lower ) delete data->m_lower; + if( data->m_upper ) delete data->m_upper; + if( data->m_fillData ) Fill::deallocate( data->m_fillData ); + delete data; +} + +template +void BoundSearch::execute(Data* data, Buffer& src, u32 nSrc, Buffer& dst, u32 nDst, Option option ) +{ + int4 constBuffer; + constBuffer.x = nSrc; + constBuffer.y = nDst; + + Buffer* srcNative = BufferUtils::map( data->m_device, &src ); + Buffer* dstNative = BufferUtils::map( data->m_device, &dst ); + + if( option == BOUND_LOWER ) + { + BufferInfo bInfo[] = { BufferInfo( srcNative, true ), BufferInfo( dstNative ) }; + + Launcher launcher( data->m_device, data->m_lowerSortDataKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( *data->m_constBuffer, constBuffer ); + launcher.launch1D( nSrc, 64 ); + } + else if( option == BOUND_UPPER ) + { + BufferInfo bInfo[] = { BufferInfo( srcNative, true ), BufferInfo( dstNative ) }; + + Launcher launcher( data->m_device, data->m_upperSortDataKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( *data->m_constBuffer, constBuffer ); + launcher.launch1D( nSrc+1, 64 ); + } + else if( option == COUNT ) + { + ADLASSERT( data->m_lower ); + ADLASSERT( data->m_upper ); + ADLASSERT( data->m_lower->getSize() <= (int)nDst ); + ADLASSERT( data->m_upper->getSize() <= (int)nDst ); + + int zero = 0; + Fill::execute( data->m_fillData, (Buffer&)*data->m_lower, zero, nDst ); + Fill::execute( data->m_fillData, (Buffer&)*data->m_upper, zero, nDst ); + + execute( data, src, nSrc, *data->m_lower, nDst, BOUND_LOWER ); + execute( data, src, nSrc, *data->m_upper, nDst, BOUND_UPPER ); + + { + BufferInfo bInfo[] = { BufferInfo( data->m_upper, true ), BufferInfo( data->m_lower, true ), BufferInfo( dstNative ) }; + + Launcher launcher( data->m_device, data->m_subtractKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( *data->m_constBuffer, constBuffer ); + launcher.launch1D( nDst, 64 ); + } + } + else + { + ADLASSERT( 0 ); + } + + BufferUtils::unmap( srcNative, &src ); + BufferUtils::unmap( dstNative, &dst ); +} + + +#undef PATH +#undef KERNEL0 +#undef KERNEL1 +#undef KERNEL2 + diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Search/BoundSearchHost.inl b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Search/BoundSearchHost.inl new file mode 100644 index 000000000..b53b3ba48 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Search/BoundSearchHost.inl @@ -0,0 +1,111 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +template<> +class BoundSearch : public BoundSearchBase +{ + public: + typedef Launcher::BufferInfo BufferInfo; + + struct Data + { + const Device* m_device; + }; + + static + Data* allocate(const Device* deviceData, int maxSize = 0) + { + ADLASSERT( deviceData->m_type == TYPE_HOST ); + Data* data = new Data; + data->m_device = deviceData; + return data; + } + + static + void deallocate(Data* data) + { + delete data; + } + + static + void execute(Data* data, Buffer& rawSrc, u32 nSrc, Buffer& rawDst, u32 nDst, Option option = BOUND_LOWER) + { + ADLASSERT( rawSrc.getType() == TYPE_HOST ); + ADLASSERT( rawDst.getType() == TYPE_HOST ); + + HostBuffer& src = *(HostBuffer*)&rawSrc; + HostBuffer& dst = *(HostBuffer*)&rawDst; + + for(int i=0; i lower( data->m_device, nDst ); + HostBuffer upper( data->m_device, nDst ); + + for(u32 i=0; i& src, Buffer& dst, int n, Option option = ); +}; + + diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Search/BoundSearchKernels.cl b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Search/BoundSearchKernels.cl new file mode 100644 index 000000000..105a17a43 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Search/BoundSearchKernels.cl @@ -0,0 +1,112 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +typedef unsigned int u32; +#define GET_GROUP_IDX get_group_id(0) +#define GET_LOCAL_IDX get_local_id(0) +#define GET_GLOBAL_IDX get_global_id(0) +#define GET_GROUP_SIZE get_local_size(0) +#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) + +typedef struct +{ + u32 m_key; + u32 m_value; +}SortData; + + + +typedef struct +{ + u32 m_nSrc; + u32 m_nDst; + u32 m_padding[2]; +} ConstBuffer; + + + +__attribute__((reqd_work_group_size(64,1,1))) +__kernel +void SearchSortDataLowerKernel(__global SortData* src, __global u32 *dst, + ConstBuffer cb) +{ + int gIdx = GET_GLOBAL_IDX; + u32 nSrc = cb.m_nSrc; + u32 nDst = cb.m_nDst; + + if( gIdx < nSrc ) + { + SortData first; first.m_key = (u32)(-1); first.m_value = (u32)(-1); + SortData end; end.m_key = nDst; end.m_value = nDst; + + SortData iData = (gIdx==0)? first: src[gIdx-1]; + SortData jData = (gIdx==nSrc)? end: src[gIdx]; + + if( iData.m_key != jData.m_key ) + { +// for(u32 k=iData.m_key+1; k<=min(jData.m_key, nDst-1); k++) + u32 k = jData.m_key; + { + dst[k] = gIdx; + } + } + } +} + + +__attribute__((reqd_work_group_size(64,1,1))) +__kernel +void SearchSortDataUpperKernel(__global SortData* src, __global u32 *dst, + ConstBuffer cb) +{ + int gIdx = GET_GLOBAL_IDX; + u32 nSrc = cb.m_nSrc; + u32 nDst = cb.m_nDst; + + if( gIdx < nSrc+1 ) + { + SortData first; first.m_key = 0; first.m_value = 0; + SortData end; end.m_key = nDst; end.m_value = nDst; + + SortData iData = (gIdx==0)? first: src[gIdx-1]; + SortData jData = (gIdx==nSrc)? end: src[gIdx]; + + if( iData.m_key != jData.m_key ) + { +// for(u32 k=iData.m_key; k src : register( t0 ); +RWStructuredBuffer dst : register( u0 ); + + +[numthreads(64, 1, 1)] +void SearchSortDataLowerKernel( DEFAULT_ARGS ) +{ + int gIdx = GET_GLOBAL_IDX; + u32 nSrc = m_nSrc; + u32 nDst = m_nDst; + + if( gIdx < nSrc ) + { + SortData iData; + SortData jData; + if( gIdx==0 ) iData.m_key = iData.m_value = (u32)-1; + else iData = src[gIdx-1]; + + if( gIdx==nSrc ) jData.m_key = jData.m_value = nDst; + else jData = src[gIdx]; + + if( iData.m_key != jData.m_key ) + { +// for(u32 k=iData.m_key+1; k<=min(jData.m_key, nDst-1); k++) + u32 k = jData.m_key; + { + dst[k] = gIdx; + } + } + } +} + +[numthreads(64, 1, 1)] +void SearchSortDataUpperKernel( DEFAULT_ARGS ) +{ + int gIdx = GET_GLOBAL_IDX; + u32 nSrc = m_nSrc; + u32 nDst = m_nDst; + + if( gIdx < nSrc+1 ) + { + SortData iData; + SortData jData; + if( gIdx==0 ) iData.m_key = iData.m_value = 0; + else iData = src[gIdx-1]; + + if( gIdx==nSrc ) jData.m_key = jData.m_value = nDst; + else jData = src[gIdx]; + + if( iData.m_key != jData.m_key ) + { +// for(u32 k=iData.m_key; k src : register( t0 );\n" +"RWStructuredBuffer dst : register( u0 );\n" +"\n" +"\n" +"[numthreads(64, 1, 1)]\n" +"void SearchSortDataLowerKernel( DEFAULT_ARGS )\n" +"{\n" +" int gIdx = GET_GLOBAL_IDX;\n" +" u32 nSrc = m_nSrc;\n" +" u32 nDst = m_nDst;\n" +"\n" +" if( gIdx < nSrc )\n" +" {\n" +" SortData iData;\n" +" SortData jData;\n" +" if( gIdx==0 ) iData.m_key = iData.m_value = (u32)-1;\n" +" else iData = src[gIdx-1];\n" +"\n" +" if( gIdx==nSrc ) jData.m_key = jData.m_value = nDst;\n" +" else jData = src[gIdx];\n" +"\n" +" if( iData.m_key != jData.m_key )\n" +" {\n" +"// for(u32 k=iData.m_key+1; k<=min(jData.m_key, nDst-1); k++)\n" +" u32 k = jData.m_key;\n" +" {\n" +" dst[k] = gIdx;\n" +" }\n" +" }\n" +" }\n" +"}\n" +"\n" +"[numthreads(64, 1, 1)]\n" +"void SearchSortDataUpperKernel( DEFAULT_ARGS )\n" +"{\n" +" int gIdx = GET_GLOBAL_IDX;\n" +" u32 nSrc = m_nSrc;\n" +" u32 nDst = m_nDst;\n" +"\n" +" if( gIdx < nSrc+1 )\n" +" {\n" +" SortData iData;\n" +" SortData jData;\n" +" if( gIdx==0 ) iData.m_key = iData.m_value = 0;\n" +" else iData = src[gIdx-1];\n" +"\n" +" if( gIdx==nSrc ) jData.m_key = jData.m_value = nDst;\n" +" else jData = src[gIdx];\n" +"\n" +" if( iData.m_key != jData.m_key )\n" +" {\n" +"// for(u32 k=iData.m_key; k +#include +#include +#include + +namespace adl +{ + +class RadixSortBase +{ + public: + enum Option + { + SORT_SIMPLE, + SORT_STANDARD, + SORT_ADVANCED + }; +}; + +template +class RadixSort : public RadixSortBase +{ + public: + struct Data + { + Option m_option; + const Device* m_deviceData; + typename PrefixScan::Data* m_scanData; + int m_maxSize; + }; + + + static + Data* allocate(const Device* deviceData, int maxSize, Option option = SORT_STANDARD); + + static + void deallocate(Data* data); + + static + void execute(Data* data, Buffer& inout, int n, int sortBits = 32); +}; + + +#include +#include + +}; diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/RadixSort.inl b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/RadixSort.inl new file mode 100644 index 000000000..f7da098b6 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/RadixSort.inl @@ -0,0 +1,58 @@ +/* + 2011 Takahiro Harada +*/ + +#include +#include +#include + + +#define DISPATCH_IMPL(x) \ + switch( data->m_option ) \ + { \ + case SORT_SIMPLE: RadixSortSimple::x; break; \ + case SORT_STANDARD: RadixSortStandard::x; break; \ + case SORT_ADVANCED: RadixSortAdvanced::x; break; \ + default:ADLASSERT(0);break; \ + } + +template +typename RadixSort::Data* RadixSort::allocate(const Device* deviceData, int maxSize, Option option) +{ + ADLASSERT( TYPE == deviceData->m_type ); + + void* dataOut; + switch( option ) + { + case SORT_SIMPLE: + dataOut = RadixSortSimple::allocate( deviceData, maxSize, option ); + break; + case SORT_STANDARD: + dataOut = RadixSortStandard::allocate( deviceData, maxSize, option ); + break; + case SORT_ADVANCED: + dataOut = RadixSortAdvanced::allocate( deviceData, maxSize, option ); + break; + default: + ADLASSERT(0); + break; + } + return (typename RadixSort::Data*)dataOut; +} + +template +void RadixSort::deallocate(Data* data) +{ + DISPATCH_IMPL( deallocate( data ) ); +} + +template +void RadixSort::execute(Data* data, Buffer& inout, int n, int sortBits) +{ + DISPATCH_IMPL( execute( data, inout, n, sortBits ) ); +} + + +#undef DISPATCH_IMPL + + diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/RadixSort32.h b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/RadixSort32.h new file mode 100644 index 000000000..c5433e72f --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/RadixSort32.h @@ -0,0 +1,98 @@ +/* + 2011 Takahiro Harada +*/ + +#pragma once + +#include +#include +#include +#include + +namespace adl +{ + +class RadixSort32Base +{ + public: +// enum Option +// { +// SORT_SIMPLE, +// SORT_STANDARD, +// SORT_ADVANCED +// }; +}; + +template +class RadixSort32 : public RadixSort32Base +{ + public: + typedef Launcher::BufferInfo BufferInfo; + + enum + { + DATA_ALIGNMENT = 256, + WG_SIZE = 64, + ELEMENTS_PER_WORK_ITEM = (256/WG_SIZE), + BITS_PER_PASS = 4, + + // if you change this, change nPerWI in kernel as well + NUM_WGS = 20*6, // cypress +// NUM_WGS = 24*6, // cayman +// NUM_WGS = 32*4, // nv + }; + + struct ConstData + { + int m_n; + int m_nWGs; + int m_startBit; + int m_nBlocksPerWG; + }; + + struct Data + { + const Device* m_device; + int m_maxSize; + + Kernel* m_streamCountKernel; + Kernel* m_streamCountSortDataKernel; + Kernel* m_prefixScanKernel; + Kernel* m_sortAndScatterKernel; + Kernel* m_sortAndScatterKeyValueKernel; + Kernel* m_sortAndScatterSortDataKernel; + + Buffer* m_workBuffer0; + Buffer* m_workBuffer1; + Buffer* m_workBuffer2; + Buffer* m_workBuffer3; + + Buffer* m_constBuffer[32/BITS_PER_PASS]; + + typename Copy::Data* m_copyData; + }; + + static + Data* allocate(const Device* device, int maxSize); + + static + void deallocate(Data* data); + + static + void execute(Data* data, Buffer& inout, int n, int sortBits = 32); + + static + void execute(Data* data, Buffer& in, Buffer& out, int n, int sortBits = 32); + + static + void execute(Data* data, Buffer& keysIn, Buffer& keysOut, Buffer& valuesIn, Buffer& valuesOut, int n, int sortBits = 32); + + static + void execute(Data* data, Buffer& keyValuesInOut, int n, int sortBits = 32 ); +}; + + +#include +#include + +}; diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/RadixSort32.inl b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/RadixSort32.inl new file mode 100644 index 000000000..468943227 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/RadixSort32.inl @@ -0,0 +1,346 @@ +/* + 2011 Takahiro Harada +*/ + +#define PATH "..\\..\\opencl\\primitives\\AdlPrimitives\\Sort\\RadixSort32Kernels" +#define RADIXSORT32_KERNEL0 "StreamCountKernel" +#define RADIXSORT32_KERNEL1 "PrefixScanKernel" +#define RADIXSORT32_KERNEL2 "SortAndScatterKernel" +#define RADIXSORT32_KERNEL3 "SortAndScatterKeyValueKernel" +#define RADIXSORT32_KERNEL4 "SortAndScatterSortDataKernel" +#define RADIXSORT32_KERNEL5 "StreamCountSortDataKernel" + +#include "RadixSort32KernelsCL.h" +#include "RadixSort32KernelsDX11.h" + +// todo. Shader compiler (2010JuneSDK) doesn't allow me to place Barriers in SortAndScatterKernel... +// So it only works on a GPU with 64 wide SIMD. + +template +typename RadixSort32::Data* RadixSort32::allocate( const Device* device, int maxSize ) +{ + ADLASSERT( TYPE == device->m_type ); + + const char* src[] = +#if defined(ADL_LOAD_KERNEL_FROM_STRING) + {radixSort32KernelsCL, radixSort32KernelsDX11}; +#else + {0,0}; +#endif + + Data* data = new Data; + data->m_device = device; + data->m_maxSize = maxSize; + data->m_streamCountKernel = device->getKernel( PATH, RADIXSORT32_KERNEL0, 0, src[TYPE] ); + data->m_streamCountSortDataKernel = device->getKernel( PATH, RADIXSORT32_KERNEL5, 0, src[TYPE] ); + + + + data->m_prefixScanKernel = device->getKernel( PATH, RADIXSORT32_KERNEL1, 0, src[TYPE] ); + data->m_sortAndScatterKernel = device->getKernel( PATH, RADIXSORT32_KERNEL2, 0, src[TYPE] ); + data->m_sortAndScatterKeyValueKernel = device->getKernel( PATH, RADIXSORT32_KERNEL3, 0, src[TYPE] ); + data->m_sortAndScatterSortDataKernel = device->getKernel( PATH, RADIXSORT32_KERNEL4, 0, src[TYPE] ); + + int wtf = NUM_WGS*(1<m_workBuffer0 = new Buffer( device, maxSize ); + data->m_workBuffer1 = new Buffer( device , wtf ); + data->m_workBuffer2 = new Buffer( device, maxSize ); + data->m_workBuffer3 = new Buffer(device,maxSize); + + + for(int i=0; i<32/BITS_PER_PASS; i++) + data->m_constBuffer[i] = new Buffer( device, 1, BufferBase::BUFFER_CONST ); + + data->m_copyData = Copy::allocate( device ); + + return data; +} + +template +void RadixSort32::deallocate( Data* data ) +{ + delete data->m_workBuffer0; + delete data->m_workBuffer1; + delete data->m_workBuffer2; + delete data->m_workBuffer3; + + for(int i=0; i<32/BITS_PER_PASS; i++) + delete data->m_constBuffer[i]; + + Copy::deallocate( data->m_copyData ); + + delete data; +} + +template +void RadixSort32::execute(Data* data, Buffer& inout, int n, int sortBits /* = 32 */ ) +{ + ADLASSERT( n%DATA_ALIGNMENT == 0 ); + ADLASSERT( n <= data->m_maxSize ); +// ADLASSERT( ELEMENTS_PER_WORK_ITEM == 4 ); + ADLASSERT( BITS_PER_PASS == 4 ); + ADLASSERT( WG_SIZE == 64 ); + ADLASSERT( (sortBits&0x3) == 0 ); + + Buffer* src = &inout; + Buffer* dst = data->m_workBuffer0; + Buffer* histogramBuffer = data->m_workBuffer1; + + int nWGs = NUM_WGS; + ConstData cdata; + { + int nBlocks = (n+ELEMENTS_PER_WORK_ITEM*WG_SIZE-1)/(ELEMENTS_PER_WORK_ITEM*WG_SIZE); + + cdata.m_n = n; + cdata.m_nWGs = NUM_WGS; + cdata.m_startBit = 0; + cdata.m_nBlocksPerWG = (nBlocks + cdata.m_nWGs - 1)/cdata.m_nWGs; + + if( nBlocks < NUM_WGS ) + { + cdata.m_nBlocksPerWG = 1; + nWGs = nBlocks; + } + } + + for(int ib=0; ibm_device, data->m_streamCountKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( *data->m_constBuffer[ib/4], cdata ); + launcher.launch1D( NUM_WGS*WG_SIZE, WG_SIZE ); + } + {// prefix scan group histogram + BufferInfo bInfo[] = { BufferInfo( histogramBuffer ) }; + Launcher launcher( data->m_device, data->m_prefixScanKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( *data->m_constBuffer[ib/4], cdata ); + launcher.launch1D( 128, 128 ); + } + {// local sort and distribute + BufferInfo bInfo[] = { BufferInfo( src, true ), BufferInfo( histogramBuffer, true ), BufferInfo( dst ) }; + Launcher launcher( data->m_device, data->m_sortAndScatterKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( *data->m_constBuffer[ib/4], cdata ); + launcher.launch1D( nWGs*WG_SIZE, WG_SIZE ); + } + swap2( src, dst ); + } + + if( src != &inout ) + { + Copy::execute( data->m_copyData, (Buffer&)inout, (Buffer&)*src, n ); + } +} + +template +void RadixSort32::execute(Data* data, Buffer& in, Buffer& out, int n, int sortBits /* = 32 */ ) +{ + ADLASSERT( n%DATA_ALIGNMENT == 0 ); + ADLASSERT( n <= data->m_maxSize ); +// ADLASSERT( ELEMENTS_PER_WORK_ITEM == 4 ); + ADLASSERT( BITS_PER_PASS == 4 ); + ADLASSERT( WG_SIZE == 64 ); + ADLASSERT( (sortBits&0x3) == 0 ); + + Buffer* src = ∈ + Buffer* dst = data->m_workBuffer0; + Buffer* histogramBuffer = data->m_workBuffer1; + + int nWGs = NUM_WGS; + ConstData cdata; + { + int nBlocks = (n+ELEMENTS_PER_WORK_ITEM*WG_SIZE-1)/(ELEMENTS_PER_WORK_ITEM*WG_SIZE); + cdata.m_n = n; + cdata.m_nWGs = NUM_WGS; + cdata.m_startBit = 0; + cdata.m_nBlocksPerWG = (nBlocks + cdata.m_nWGs - 1)/cdata.m_nWGs; + if( nBlocks < NUM_WGS ) + { + cdata.m_nBlocksPerWG = 1; + nWGs = nBlocks; + } + } + + if( sortBits == 4 ) dst = &out; + + for(int ib=0; ibm_device, data->m_streamCountKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( *data->m_constBuffer[ib/4], cdata ); + launcher.launch1D( NUM_WGS*WG_SIZE, WG_SIZE ); + } + {// prefix scan group histogram + BufferInfo bInfo[] = { BufferInfo( histogramBuffer ) }; + Launcher launcher( data->m_device, data->m_prefixScanKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( *data->m_constBuffer[ib/4], cdata ); + launcher.launch1D( 128, 128 ); + } + {// local sort and distribute + BufferInfo bInfo[] = { BufferInfo( src, true ), BufferInfo( histogramBuffer, true ), BufferInfo( dst ) }; + Launcher launcher( data->m_device, data->m_sortAndScatterKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( *data->m_constBuffer[ib/4], cdata ); + launcher.launch1D( nWGs*WG_SIZE, WG_SIZE ); + } + swap2( src, dst ); + } +} + +template +void RadixSort32::execute(Data* data, Buffer& keysIn, Buffer& keysOut, Buffer& valuesIn, Buffer& valuesOut, int n, int sortBits /* = 32 */) +{ + ADLASSERT( n%DATA_ALIGNMENT == 0 ); + ADLASSERT( n <= data->m_maxSize ); +// ADLASSERT( ELEMENTS_PER_WORK_ITEM == 4 ); + ADLASSERT( BITS_PER_PASS == 4 ); + ADLASSERT( WG_SIZE == 64 ); + ADLASSERT( (sortBits&0x3) == 0 ); + + Buffer* src = &keysIn; + Buffer* srcVal = &valuesIn; + Buffer* dst = data->m_workBuffer0; + Buffer* dstVal = data->m_workBuffer2; + Buffer* histogramBuffer = data->m_workBuffer1; + + int nWGs = NUM_WGS; + ConstData cdata; + { + int nBlocks = (n+ELEMENTS_PER_WORK_ITEM*WG_SIZE-1)/(ELEMENTS_PER_WORK_ITEM*WG_SIZE); + cdata.m_n = n; + cdata.m_nWGs = NUM_WGS; + cdata.m_startBit = 0; + cdata.m_nBlocksPerWG = (nBlocks + cdata.m_nWGs - 1)/cdata.m_nWGs; + if( nBlocks < NUM_WGS ) + { + cdata.m_nBlocksPerWG = 1; + nWGs = nBlocks; + } + } + + if( sortBits == 4 ) + { + dst = &keysOut; + dstVal = &valuesOut; + } + + for(int ib=0; ibm_device, data->m_streamCountKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( *data->m_constBuffer[ib/4], cdata ); + launcher.launch1D( NUM_WGS*WG_SIZE, WG_SIZE ); + } + {// prefix scan group histogram + BufferInfo bInfo[] = { BufferInfo( histogramBuffer ) }; + Launcher launcher( data->m_device, data->m_prefixScanKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( *data->m_constBuffer[ib/4], cdata ); + launcher.launch1D( 128, 128 ); + } + {// local sort and distribute + BufferInfo bInfo[] = { BufferInfo( src, true ), BufferInfo( srcVal, true ), BufferInfo( histogramBuffer, true ), BufferInfo( dst ), BufferInfo( dstVal ) }; + Launcher launcher( data->m_device, data->m_sortAndScatterKeyValueKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( *data->m_constBuffer[ib/4], cdata ); + launcher.launch1D( nWGs*WG_SIZE, WG_SIZE ); + } + swap2( src, dst ); + swap2( srcVal, dstVal ); + } +} + +template +void RadixSort32::execute(Data* data, Buffer& keyValuesInOut, int n, int sortBits /* = 32 */) +{ + ADLASSERT( n%DATA_ALIGNMENT == 0 ); + ADLASSERT( n <= data->m_maxSize ); +// ADLASSERT( ELEMENTS_PER_WORK_ITEM == 4 ); + ADLASSERT( BITS_PER_PASS == 4 ); + ADLASSERT( WG_SIZE == 64 ); + ADLASSERT( (sortBits&0x3) == 0 ); + + Buffer* src = &keyValuesInOut; + Buffer* dst = data->m_workBuffer3; + + Buffer* histogramBuffer = data->m_workBuffer1; + + int nWGs = NUM_WGS; + ConstData cdata; + { + int nBlocks = (n+ELEMENTS_PER_WORK_ITEM*WG_SIZE-1)/(ELEMENTS_PER_WORK_ITEM*WG_SIZE); + cdata.m_n = n; + cdata.m_nWGs = NUM_WGS; + cdata.m_startBit = 0; + cdata.m_nBlocksPerWG = (nBlocks + cdata.m_nWGs - 1)/cdata.m_nWGs; + if( nBlocks < NUM_WGS ) + { + cdata.m_nBlocksPerWG = 1; + nWGs = nBlocks; + } + } + + int count=0; + for(int ib=0; ibm_device, data->m_streamCountSortDataKernel); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( *data->m_constBuffer[ib/4], cdata ); + launcher.launch1D( NUM_WGS*WG_SIZE, WG_SIZE ); + } + {// prefix scan group histogram + BufferInfo bInfo[] = { BufferInfo( histogramBuffer ) }; + Launcher launcher( data->m_device, data->m_prefixScanKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( *data->m_constBuffer[ib/4], cdata ); + launcher.launch1D( 128, 128 ); + } + {// local sort and distribute + BufferInfo bInfo[] = { BufferInfo( src, true ), BufferInfo( histogramBuffer, true ), BufferInfo( dst )}; + Launcher launcher( data->m_device, data->m_sortAndScatterSortDataKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( *data->m_constBuffer[ib/4], cdata ); + launcher.launch1D( nWGs*WG_SIZE, WG_SIZE ); + } + swap2( src, dst ); + count++; + } + + if (count&1) + { + ADLASSERT(0);//need to copy from workbuffer to keyValuesInOut + + } +} +#undef PATH +#undef RADIXSORT32_KERNEL0 +#undef RADIXSORT32_KERNEL1 +#undef RADIXSORT32_KERNEL2 +#undef RADIXSORT32_KERNEL3 + diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/RadixSort32Host.inl b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/RadixSort32Host.inl new file mode 100644 index 000000000..7e174f48d --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/RadixSort32Host.inl @@ -0,0 +1,163 @@ +/* + 2011 Takahiro Harada +*/ + +template<> +class RadixSort32 : public RadixSort32Base +{ + public: + typedef Launcher::BufferInfo BufferInfo; + + enum + { + BITS_PER_PASS = 8, + NUM_TABLES = (1<* m_workBuffer; + }; + + static + Data* allocate(const Device* device, int maxSize) + { + ADLASSERT( device->m_type == TYPE_HOST ); + + Data* data = new Data; + data->m_workBuffer = new HostBuffer( device, maxSize ); + return data; + } + + static + void deallocate(Data* data) + { + delete data->m_workBuffer; + delete data; + } + + static + void execute(Data* data, Buffer& inout, int n, int sortBits = 32) + { + ADLASSERT( inout.getType() == TYPE_HOST ); + + int tables[NUM_TABLES]; + int counter[NUM_TABLES]; + + u32* src = inout.m_ptr; + u32* dst = data->m_workBuffer->m_ptr; + + for(int startBit=0; startBit> startBit) & (NUM_TABLES-1); + tables[tableIdx]++; + } + + // prefix scan + int sum = 0; + for(int i=0; i> startBit) & (NUM_TABLES-1); + + dst[tables[tableIdx] + counter[tableIdx]] = src[i]; + counter[tableIdx] ++; + } + + swap2( src, dst ); + } + + { + if( src != inout.m_ptr ) + { + memcpy( dst, src, sizeof(u32)*n ); + } + } + + } + + static + void execute(Data* data, Buffer& keyInout, const Buffer& valueInout, int n, int sortBits = 32) + { + ADLASSERT( keyInout.getType() == TYPE_HOST ); + + int tables[NUM_TABLES]; + int counter[NUM_TABLES]; + + u32* src = keyInout.m_ptr; + u32* dst = data->m_workBuffer->m_ptr; + + HostBuffer bufVal(valueInout.m_device, valueInout.m_size); + bufVal.write(valueInout.m_ptr, valueInout.m_size); + + u32* srcVal = valueInout.m_ptr; + u32* dstVal = bufVal.m_ptr; + + for(int startBit=0; startBit> startBit) & (NUM_TABLES-1); + tables[tableIdx]++; + } + + // prefix scan + int sum = 0; + for(int i=0; i> startBit) & (NUM_TABLES-1); + int newIdx = tables[tableIdx] + counter[tableIdx]; + dst[newIdx] = src[i]; + dstVal[newIdx] = srcVal[i]; + counter[tableIdx]++; + } + + swap2( src, dst ); + swap2( srcVal, dstVal ); + } + + { + if( src != keyInout.m_ptr ) + { + memcpy( dst, src, sizeof(u32)*n ); + } + + if( srcVal != valueInout.m_ptr ) + { + memcpy( dstVal, srcVal, sizeof(u32)*n ); + } + } + + } +}; + + diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/RadixSort32Kernels.cl b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/RadixSort32Kernels.cl new file mode 100644 index 000000000..44dd9a90f --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/RadixSort32Kernels.cl @@ -0,0 +1,1104 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Author Takahiro Harada + + +//#pragma OPENCL EXTENSION cl_amd_printf : enable +#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable +#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable + +typedef unsigned int u32; +#define GET_GROUP_IDX get_group_id(0) +#define GET_LOCAL_IDX get_local_id(0) +#define GET_GLOBAL_IDX get_global_id(0) +#define GET_GROUP_SIZE get_local_size(0) +#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) +#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) +#define AtomInc(x) atom_inc(&(x)) +#define AtomInc1(x, out) out = atom_inc(&(x)) +#define AtomAdd(x, value) atom_add(&(x), value) + +#define SELECT_UINT4( b, a, condition ) select( b,a,condition ) + + +#define make_uint4 (uint4) +#define make_uint2 (uint2) +#define make_int2 (int2) + +#define WG_SIZE 64 +#define ELEMENTS_PER_WORK_ITEM (256/WG_SIZE) +#define BITS_PER_PASS 4 +#define NUM_BUCKET (1< 64 ) + { + sorterSharedMemory[idx] += sorterSharedMemory[idx-64]; + GROUP_MEM_FENCE; + } + + sorterSharedMemory[idx-1] += sorterSharedMemory[idx-2]; + GROUP_MEM_FENCE; + } +#else + if( lIdx < 64 ) + { + sorterSharedMemory[idx] += sorterSharedMemory[idx-1]; + GROUP_MEM_FENCE; + sorterSharedMemory[idx] += sorterSharedMemory[idx-2]; + GROUP_MEM_FENCE; + sorterSharedMemory[idx] += sorterSharedMemory[idx-4]; + GROUP_MEM_FENCE; + sorterSharedMemory[idx] += sorterSharedMemory[idx-8]; + GROUP_MEM_FENCE; + sorterSharedMemory[idx] += sorterSharedMemory[idx-16]; + GROUP_MEM_FENCE; + sorterSharedMemory[idx] += sorterSharedMemory[idx-32]; + GROUP_MEM_FENCE; + if( wgSize > 64 ) + { + sorterSharedMemory[idx] += sorterSharedMemory[idx-64]; + GROUP_MEM_FENCE; + } + + sorterSharedMemory[idx-1] += sorterSharedMemory[idx-2]; + GROUP_MEM_FENCE; + } +#endif + } + + GROUP_LDS_BARRIER; + + *totalSum = sorterSharedMemory[wgSize*2-1]; + u32 addValue = sorterSharedMemory[lIdx+wgSize-1]; + return addValue; +} + +//__attribute__((reqd_work_group_size(128,1,1))) +uint4 localPrefixSum128V( uint4 pData, uint lIdx, uint* totalSum, __local u32 sorterSharedMemory[] ) +{ + u32 s4 = prefixScanVectorEx( &pData ); + u32 rank = localPrefixSum( s4, lIdx, totalSum, sorterSharedMemory, 128 ); + return pData + make_uint4( rank, rank, rank, rank ); +} + + +//__attribute__((reqd_work_group_size(64,1,1))) +uint4 localPrefixSum64V( uint4 pData, uint lIdx, uint* totalSum, __local u32 sorterSharedMemory[] ) +{ + u32 s4 = prefixScanVectorEx( &pData ); + u32 rank = localPrefixSum( s4, lIdx, totalSum, sorterSharedMemory, 64 ); + return pData + make_uint4( rank, rank, rank, rank ); +} + +u32 unpack4Key( u32 key, int keyIdx ){ return (key>>(keyIdx*8)) & 0xff;} + +u32 bit8Scan(u32 v) +{ + return (v<<8) + (v<<16) + (v<<24); +} + +//=== + + + + +#define MY_HISTOGRAM(idx) localHistogramMat[(idx)*WG_SIZE+lIdx] + + +__kernel +__attribute__((reqd_work_group_size(WG_SIZE,1,1))) +void StreamCountKernel( __global u32* gSrc, __global u32* histogramOut, ConstBuffer cb ) +{ + __local u32 localHistogramMat[NUM_BUCKET*WG_SIZE]; + + u32 gIdx = GET_GLOBAL_IDX; + u32 lIdx = GET_LOCAL_IDX; + u32 wgIdx = GET_GROUP_IDX; + u32 wgSize = GET_GROUP_SIZE; + const int startBit = cb.m_startBit; + const int n = cb.m_n; + const int nWGs = cb.m_nWGs; + const int nBlocksPerWG = cb.m_nBlocksPerWG; + + for(int i=0; i>startBit) & 0xf; +#if defined(NV_GPU) + MY_HISTOGRAM( localKey )++; +#else + AtomInc( MY_HISTOGRAM( localKey ) ); +#endif + } + } + } + + GROUP_LDS_BARRIER; + + if( lIdx < NUM_BUCKET ) + { + u32 sum = 0; + for(int i=0; i>startBit) & 0xf; +#if defined(NV_GPU) + MY_HISTOGRAM( localKey )++; +#else + AtomInc( MY_HISTOGRAM( localKey ) ); +#endif + } + } + } + + GROUP_LDS_BARRIER; + + if( lIdx < NUM_BUCKET ) + { + u32 sum = 0; + for(int i=0; i>startBit) & mask, (sortData[1]>>startBit) & mask, (sortData[2]>>startBit) & mask, (sortData[3]>>startBit) & mask ); + uint4 prefixSum = SELECT_UINT4( make_uint4(1,1,1,1), make_uint4(0,0,0,0), cmpResult != make_uint4(0,0,0,0) ); + u32 total; + prefixSum = localPrefixSum64V( prefixSum, lIdx, &total, ldsSortData ); + { + uint4 localAddr = make_uint4(lIdx*4+0,lIdx*4+1,lIdx*4+2,lIdx*4+3); + uint4 dstAddr = localAddr - prefixSum + make_uint4( total, total, total, total ); + dstAddr = SELECT_UINT4( prefixSum, dstAddr, cmpResult != make_uint4(0, 0, 0, 0) ); + + GROUP_LDS_BARRIER; + + ldsSortData[dstAddr.x] = sortData[0]; + ldsSortData[dstAddr.y] = sortData[1]; + ldsSortData[dstAddr.z] = sortData[2]; + ldsSortData[dstAddr.w] = sortData[3]; + + GROUP_LDS_BARRIER; + + sortData[0] = ldsSortData[localAddr.x]; + sortData[1] = ldsSortData[localAddr.y]; + sortData[2] = ldsSortData[localAddr.z]; + sortData[3] = ldsSortData[localAddr.w]; + + GROUP_LDS_BARRIER; + } + } +} + +// 2 scan, 2 exchange +void sort4Bits1(u32 sortData[4], int startBit, int lIdx, __local u32* ldsSortData) +{ + for(uint ibit=0; ibit>(startBit+ibit)) & 0x3, + (sortData[1]>>(startBit+ibit)) & 0x3, + (sortData[2]>>(startBit+ibit)) & 0x3, + (sortData[3]>>(startBit+ibit)) & 0x3); + + u32 key4; + u32 sKeyPacked[4] = { 0, 0, 0, 0 }; + { + sKeyPacked[0] |= 1<<(8*b.x); + sKeyPacked[1] |= 1<<(8*b.y); + sKeyPacked[2] |= 1<<(8*b.z); + sKeyPacked[3] |= 1<<(8*b.w); + + key4 = sKeyPacked[0] + sKeyPacked[1] + sKeyPacked[2] + sKeyPacked[3]; + } + + u32 rankPacked; + u32 sumPacked; + { + rankPacked = localPrefixSum( key4, lIdx, &sumPacked, ldsSortData, WG_SIZE ); + } + + GROUP_LDS_BARRIER; + + u32 newOffset[4] = { 0,0,0,0 }; + { + u32 sumScanned = bit8Scan( sumPacked ); + + u32 scannedKeys[4]; + scannedKeys[0] = 1<<(8*b.x); + scannedKeys[1] = 1<<(8*b.y); + scannedKeys[2] = 1<<(8*b.z); + scannedKeys[3] = 1<<(8*b.w); + { // 4 scans at once + u32 sum4 = 0; + for(int ie=0; ie<4; ie++) + { + u32 tmp = scannedKeys[ie]; + scannedKeys[ie] = sum4; + sum4 += tmp; + } + } + + { + u32 sumPlusRank = sumScanned + rankPacked; + { u32 ie = b.x; + scannedKeys[0] += sumPlusRank; + newOffset[0] = unpack4Key( scannedKeys[0], ie ); + } + { u32 ie = b.y; + scannedKeys[1] += sumPlusRank; + newOffset[1] = unpack4Key( scannedKeys[1], ie ); + } + { u32 ie = b.z; + scannedKeys[2] += sumPlusRank; + newOffset[2] = unpack4Key( scannedKeys[2], ie ); + } + { u32 ie = b.w; + scannedKeys[3] += sumPlusRank; + newOffset[3] = unpack4Key( scannedKeys[3], ie ); + } + } + } + + + GROUP_LDS_BARRIER; + + { + ldsSortData[newOffset[0]] = sortData[0]; + ldsSortData[newOffset[1]] = sortData[1]; + ldsSortData[newOffset[2]] = sortData[2]; + ldsSortData[newOffset[3]] = sortData[3]; + + GROUP_LDS_BARRIER; + + u32 dstAddr = 4*lIdx; + sortData[0] = ldsSortData[dstAddr+0]; + sortData[1] = ldsSortData[dstAddr+1]; + sortData[2] = ldsSortData[dstAddr+2]; + sortData[3] = ldsSortData[dstAddr+3]; + + GROUP_LDS_BARRIER; + } + } +} + +#define SET_HISTOGRAM(setIdx, key) ldsSortData[(setIdx)*NUM_BUCKET+key] + +__kernel +__attribute__((reqd_work_group_size(WG_SIZE,1,1))) +void SortAndScatterKernel( __global const u32* restrict gSrc, __global const u32* rHistogram, __global u32* restrict gDst, ConstBuffer cb ) +{ + __local u32 ldsSortData[WG_SIZE*ELEMENTS_PER_WORK_ITEM+16]; + __local u32 localHistogramToCarry[NUM_BUCKET]; + __local u32 localHistogram[NUM_BUCKET*2]; + + u32 gIdx = GET_GLOBAL_IDX; + u32 lIdx = GET_LOCAL_IDX; + u32 wgIdx = GET_GROUP_IDX; + u32 wgSize = GET_GROUP_SIZE; + + const int n = cb.m_n; + const int nWGs = cb.m_nWGs; + const int startBit = cb.m_startBit; + const int nBlocksPerWG = cb.m_nBlocksPerWG; + + if( lIdx < (NUM_BUCKET) ) + { + localHistogramToCarry[lIdx] = rHistogram[lIdx*nWGs + wgIdx]; + } + + GROUP_LDS_BARRIER; + + const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE; + + int nBlocks = n/blockSize - nBlocksPerWG*wgIdx; + + int addr = blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx; + + for(int iblock=0; iblock>startBit) & 0xf; + + { // create histogram + u32 setIdx = lIdx/16; + if( lIdx < NUM_BUCKET ) + { + localHistogram[lIdx] = 0; + } + ldsSortData[lIdx] = 0; + GROUP_LDS_BARRIER; + + for(int i=0; i>(startBit+ibit)) & 0x3, + (sortData[1]>>(startBit+ibit)) & 0x3, + (sortData[2]>>(startBit+ibit)) & 0x3, + (sortData[3]>>(startBit+ibit)) & 0x3); + + u32 key4; + u32 sKeyPacked[4] = { 0, 0, 0, 0 }; + { + sKeyPacked[0] |= 1<<(8*b.x); + sKeyPacked[1] |= 1<<(8*b.y); + sKeyPacked[2] |= 1<<(8*b.z); + sKeyPacked[3] |= 1<<(8*b.w); + + key4 = sKeyPacked[0] + sKeyPacked[1] + sKeyPacked[2] + sKeyPacked[3]; + } + + u32 rankPacked; + u32 sumPacked; + { + rankPacked = localPrefixSum( key4, lIdx, &sumPacked, ldsSortData, WG_SIZE ); + } + + GROUP_LDS_BARRIER; + + u32 newOffset[4] = { 0,0,0,0 }; + { + u32 sumScanned = bit8Scan( sumPacked ); + + u32 scannedKeys[4]; + scannedKeys[0] = 1<<(8*b.x); + scannedKeys[1] = 1<<(8*b.y); + scannedKeys[2] = 1<<(8*b.z); + scannedKeys[3] = 1<<(8*b.w); + { // 4 scans at once + u32 sum4 = 0; + for(int ie=0; ie<4; ie++) + { + u32 tmp = scannedKeys[ie]; + scannedKeys[ie] = sum4; + sum4 += tmp; + } + } + + { + u32 sumPlusRank = sumScanned + rankPacked; + { u32 ie = b.x; + scannedKeys[0] += sumPlusRank; + newOffset[0] = unpack4Key( scannedKeys[0], ie ); + } + { u32 ie = b.y; + scannedKeys[1] += sumPlusRank; + newOffset[1] = unpack4Key( scannedKeys[1], ie ); + } + { u32 ie = b.z; + scannedKeys[2] += sumPlusRank; + newOffset[2] = unpack4Key( scannedKeys[2], ie ); + } + { u32 ie = b.w; + scannedKeys[3] += sumPlusRank; + newOffset[3] = unpack4Key( scannedKeys[3], ie ); + } + } + } + + + GROUP_LDS_BARRIER; + + { + ldsSortData[newOffset[0]] = sortData[0]; + ldsSortData[newOffset[1]] = sortData[1]; + ldsSortData[newOffset[2]] = sortData[2]; + ldsSortData[newOffset[3]] = sortData[3]; + + ldsSortVal[newOffset[0]] = sortVal[0]; + ldsSortVal[newOffset[1]] = sortVal[1]; + ldsSortVal[newOffset[2]] = sortVal[2]; + ldsSortVal[newOffset[3]] = sortVal[3]; + + GROUP_LDS_BARRIER; + + u32 dstAddr = 4*lIdx; + sortData[0] = ldsSortData[dstAddr+0]; + sortData[1] = ldsSortData[dstAddr+1]; + sortData[2] = ldsSortData[dstAddr+2]; + sortData[3] = ldsSortData[dstAddr+3]; + + sortVal[0] = ldsSortVal[dstAddr+0]; + sortVal[1] = ldsSortVal[dstAddr+1]; + sortVal[2] = ldsSortVal[dstAddr+2]; + sortVal[3] = ldsSortVal[dstAddr+3]; + + GROUP_LDS_BARRIER; + } + } +} + + + +__kernel +__attribute__((reqd_work_group_size(WG_SIZE,1,1))) +void SortAndScatterKeyValueKernel( __global const u32* restrict gSrc, __global const int* restrict gSrcVal, __global const u32* rHistogram, __global u32* restrict gDst, __global int* restrict gDstVal, ConstBuffer cb) +{ + __local u32 ldsSortData[WG_SIZE*ELEMENTS_PER_WORK_ITEM+16]; + __local int ldsSortVal[WG_SIZE*ELEMENTS_PER_WORK_ITEM+16]; + __local u32 localHistogramToCarry[NUM_BUCKET]; + __local u32 localHistogram[NUM_BUCKET*2]; + + u32 gIdx = GET_GLOBAL_IDX; + u32 lIdx = GET_LOCAL_IDX; + u32 wgIdx = GET_GROUP_IDX; + u32 wgSize = GET_GROUP_SIZE; + + const int n = cb.m_n; + const int nWGs = cb.m_nWGs; + const int startBit = cb.m_startBit; + const int nBlocksPerWG = cb.m_nBlocksPerWG; + + if( lIdx < (NUM_BUCKET) ) + { + localHistogramToCarry[lIdx] = rHistogram[lIdx*nWGs + wgIdx]; + } + + GROUP_LDS_BARRIER; + + const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE; + + int nBlocks = n/blockSize - nBlocksPerWG*wgIdx; + + int addr = blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx; + + for(int iblock=0; iblock>startBit) & 0xf; + + { // create histogram + u32 setIdx = lIdx/16; + if( lIdx < NUM_BUCKET ) + { + localHistogram[lIdx] = 0; + } + ldsSortData[lIdx] = 0; + GROUP_LDS_BARRIER; + + for(int i=0; i>startBit) & 0xf; + + { // create histogram + u32 setIdx = lIdx/16; + if( lIdx < NUM_BUCKET ) + { + localHistogram[lIdx] = 0; + } + ldsSortData[lIdx] = 0; + GROUP_LDS_BARRIER; + + for(int i=0; i gSrc : register( t0 ); +StructuredBuffer gSrcVal : register( t1 ); +StructuredBuffer rHistogram : register( t1 ); +StructuredBuffer rHistogram2 : register( t2 ); +RWStructuredBuffer histogramOut : register( u0 ); +RWStructuredBuffer wHistogram1 : register( u0 ); +RWStructuredBuffer gDst : register( u0 ); +RWStructuredBuffer gDstVal : register( u1 ); + +groupshared u32 localHistogramMat[NUM_BUCKET*WG_SIZE]; +#define MY_HISTOGRAM(idx) localHistogramMat[(idx)*WG_SIZE+lIdx] + + +[numthreads(WG_SIZE, 1, 1)] +void StreamCountKernel( DEFAULT_ARGS ) +{ + u32 gIdx = GET_GLOBAL_IDX; + u32 lIdx = GET_LOCAL_IDX; + u32 wgIdx = GET_GROUP_IDX; + u32 wgSize = GET_GROUP_SIZE; + const int startBit = m_startBit; + + const int n = m_n; + const int nWGs = m_nWGs; + const int nBlocksPerWG = m_nBlocksPerWG; + + for(int i=0; i>startBit) & 0xf; +#if defined(NV_GPU) + MY_HISTOGRAM( localKey )++; +#else + AtomInc( MY_HISTOGRAM( localKey ) ); +#endif + } + } + } + + GROUP_LDS_BARRIER; + + if( lIdx < NUM_BUCKET ) + { + u32 sum = 0; + for(int i=0; i 64 ) + { + ldsSortData[idx] += ldsSortData[idx-64]; + GROUP_MEM_FENCE; + } + + ldsSortData[idx-1] += ldsSortData[idx-2]; + GROUP_MEM_FENCE; + } +#else + if( lIdx < 64 ) + { + ldsSortData[idx] += ldsSortData[idx-1]; + GROUP_MEM_FENCE; + ldsSortData[idx] += ldsSortData[idx-2]; + GROUP_MEM_FENCE; + ldsSortData[idx] += ldsSortData[idx-4]; + GROUP_MEM_FENCE; + ldsSortData[idx] += ldsSortData[idx-8]; + GROUP_MEM_FENCE; + ldsSortData[idx] += ldsSortData[idx-16]; + GROUP_MEM_FENCE; + ldsSortData[idx] += ldsSortData[idx-32]; + GROUP_MEM_FENCE; + if( wgSize > 64 ) + { + ldsSortData[idx] += ldsSortData[idx-64]; + GROUP_MEM_FENCE; + } + + ldsSortData[idx-1] += ldsSortData[idx-2]; + GROUP_MEM_FENCE; + } +#endif + } + + GROUP_LDS_BARRIER; + + totalSum = ldsSortData[wgSize*2-1]; + u32 addValue = ldsSortData[lIdx+wgSize-1]; + return addValue; +} + +//__attribute__((reqd_work_group_size(128,1,1))) +uint4 localPrefixSum128V( uint4 pData, uint lIdx, inout uint totalSum ) +{ + u32 s4 = prefixScanVectorEx( pData ); + u32 rank = localPrefixSum( s4, lIdx, totalSum, 128 ); + return pData + make_uint4( rank, rank, rank, rank ); +} + +//__attribute__((reqd_work_group_size(64,1,1))) +uint4 localPrefixSum64V( uint4 pData, uint lIdx, inout uint totalSum ) +{ + u32 s4 = prefixScanVectorEx( pData ); + u32 rank = localPrefixSum( s4, lIdx, totalSum, 64 ); + return pData + make_uint4( rank, rank, rank, rank ); +} + + + + + +#define nPerLane (nPerWI/4) + +// NUM_BUCKET*nWGs < 128*nPerWI +[numthreads(128, 1, 1)] +void PrefixScanKernel( DEFAULT_ARGS ) +{ + u32 lIdx = GET_LOCAL_IDX; + u32 wgIdx = GET_GROUP_IDX; + const int nWGs = m_nWGs; + + u32 data[nPerWI]; + for(int i=0; i>(keyIdx*8)) & 0xff;} + +u32 bit8Scan(u32 v) +{ + return (v<<8) + (v<<16) + (v<<24); +} + + + + +void sort4Bits1(inout u32 sortData[4], int startBit, int lIdx) +{ +/* + for(uint ibit=0; ibit>(startBit+ibit)) & 0x3, + (sortData[1]>>(startBit+ibit)) & 0x3, + (sortData[2]>>(startBit+ibit)) & 0x3, + (sortData[3]>>(startBit+ibit)) & 0x3); + + u32 key4; + u32 sKeyPacked[4] = { 0, 0, 0, 0 }; + { + sKeyPacked[0] |= 1<<(8*b.x); + sKeyPacked[1] |= 1<<(8*b.y); + sKeyPacked[2] |= 1<<(8*b.z); + sKeyPacked[3] |= 1<<(8*b.w); + + key4 = sKeyPacked[0] + sKeyPacked[1] + sKeyPacked[2] + sKeyPacked[3]; + } + + u32 rankPacked; + u32 sumPacked; + { + rankPacked = localPrefixSum64VSingle( key4, lIdx, sumPacked ); + } + +// GROUP_LDS_BARRIER; + + u32 sum[4] = { unpack4Key( sumPacked,0 ), unpack4Key( sumPacked,1 ), unpack4Key( sumPacked,2 ), unpack4Key( sumPacked,3 ) }; + + { + u32 sum4 = 0; + for(int ie=0; ie<4; ie++) + { + u32 tmp = sum[ie]; + sum[ie] = sum4; + sum4 += tmp; + } + } + + u32 newOffset[4] = { 0,0,0,0 }; + + for(int ie=0; ie<4; ie++) + { + uint4 key = extractKeys( b, ie ); + uint4 scannedKey = key; + prefixScanVectorEx( scannedKey ); + uint offset = sum[ie] + unpack4Key( rankPacked, ie ); + uint4 dstAddress = make_uint4( offset, offset, offset, offset ) + scannedKey; + + newOffset[0] += dstAddress.x*key.x; + newOffset[1] += dstAddress.y*key.y; + newOffset[2] += dstAddress.z*key.z; + newOffset[3] += dstAddress.w*key.w; + } + + + + { + ldsSortData[newOffset[0]] = sortData[0]; + ldsSortData[newOffset[1]] = sortData[1]; + ldsSortData[newOffset[2]] = sortData[2]; + ldsSortData[newOffset[3]] = sortData[3]; + +// GROUP_LDS_BARRIER; + + sortData[0] = ldsSortData[lIdx*4+0]; + sortData[1] = ldsSortData[lIdx*4+1]; + sortData[2] = ldsSortData[lIdx*4+2]; + sortData[3] = ldsSortData[lIdx*4+3]; + +// GROUP_LDS_BARRIER; + } + } +*/ + for(uint ibit=0; ibit>(startBit+ibit)) & 0x3, + (sortData[1]>>(startBit+ibit)) & 0x3, + (sortData[2]>>(startBit+ibit)) & 0x3, + (sortData[3]>>(startBit+ibit)) & 0x3); + + u32 key4; + u32 sKeyPacked[4] = { 0, 0, 0, 0 }; + { + sKeyPacked[0] |= 1<<(8*b.x); + sKeyPacked[1] |= 1<<(8*b.y); + sKeyPacked[2] |= 1<<(8*b.z); + sKeyPacked[3] |= 1<<(8*b.w); + + key4 = sKeyPacked[0] + sKeyPacked[1] + sKeyPacked[2] + sKeyPacked[3]; + } + + u32 rankPacked; + u32 sumPacked; + { + rankPacked = localPrefixSum( key4, lIdx, sumPacked, WG_SIZE ); + } + + GROUP_LDS_BARRIER; + + u32 newOffset[4] = { 0,0,0,0 }; + { + u32 sumScanned = bit8Scan( sumPacked ); + + u32 scannedKeys[4]; + scannedKeys[0] = 1<<(8*b.x); + scannedKeys[1] = 1<<(8*b.y); + scannedKeys[2] = 1<<(8*b.z); + scannedKeys[3] = 1<<(8*b.w); + { // 4 scans at once + u32 sum4 = 0; + for(int ie=0; ie<4; ie++) + { + u32 tmp = scannedKeys[ie]; + scannedKeys[ie] = sum4; + sum4 += tmp; + } + } + + { + u32 sumPlusRank = sumScanned + rankPacked; + { u32 ie = b.x; + scannedKeys[0] += sumPlusRank; + newOffset[0] = unpack4Key( scannedKeys[0], ie ); + } + { u32 ie = b.y; + scannedKeys[1] += sumPlusRank; + newOffset[1] = unpack4Key( scannedKeys[1], ie ); + } + { u32 ie = b.z; + scannedKeys[2] += sumPlusRank; + newOffset[2] = unpack4Key( scannedKeys[2], ie ); + } + { u32 ie = b.w; + scannedKeys[3] += sumPlusRank; + newOffset[3] = unpack4Key( scannedKeys[3], ie ); + } + } + } + + + GROUP_LDS_BARRIER; + + { + ldsSortData[newOffset[0]] = sortData[0]; + ldsSortData[newOffset[1]] = sortData[1]; + ldsSortData[newOffset[2]] = sortData[2]; + ldsSortData[newOffset[3]] = sortData[3]; + + GROUP_LDS_BARRIER; + + u32 dstAddr = 4*lIdx; + sortData[0] = ldsSortData[dstAddr+0]; + sortData[1] = ldsSortData[dstAddr+1]; + sortData[2] = ldsSortData[dstAddr+2]; + sortData[3] = ldsSortData[dstAddr+3]; + + GROUP_LDS_BARRIER; + } + } +} + + +groupshared u32 localHistogramToCarry[NUM_BUCKET]; +groupshared u32 localHistogram[NUM_BUCKET*2]; +#define SET_HISTOGRAM(setIdx, key) ldsSortData[(setIdx)*NUM_BUCKET+key] + + +[numthreads(WG_SIZE, 1, 1)] +void SortAndScatterKernel( DEFAULT_ARGS ) +{ + u32 gIdx = GET_GLOBAL_IDX; + u32 lIdx = GET_LOCAL_IDX; + u32 wgIdx = GET_GROUP_IDX; + u32 wgSize = GET_GROUP_SIZE; + + const int n = m_n; + const int nWGs = m_nWGs; + const int startBit = m_startBit; + const int nBlocksPerWG = m_nBlocksPerWG; + + if( lIdx < (NUM_BUCKET) ) + { + localHistogramToCarry[lIdx] = rHistogram[lIdx*nWGs + wgIdx]; + } + + GROUP_LDS_BARRIER; + + const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE; + + int nBlocks = n/blockSize - nBlocksPerWG*wgIdx; + + int addr = blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx; + + for(int iblock=0; iblock>startBit) & 0xf; + + { // create histogram + u32 setIdx = lIdx/16; + if( lIdx < NUM_BUCKET ) + { + localHistogram[lIdx] = 0; + } + ldsSortData[lIdx] = 0; + GROUP_LDS_BARRIER; + + for(int i=0; i>startBit) & 0xf; + + { // create histogram + if( lIdx < NUM_BUCKET ) + { + localHistogram[lIdx] = 0; + localHistogram[NUM_BUCKET+lIdx] = 0; + } +// GROUP_LDS_BARRIER; + + AtomInc( localHistogram[NUM_BUCKET+keys[0]] ); + AtomInc( localHistogram[NUM_BUCKET+keys[1]] ); + AtomInc( localHistogram[NUM_BUCKET+keys[2]] ); + AtomInc( localHistogram[NUM_BUCKET+keys[3]] ); + +// GROUP_LDS_BARRIER; + + uint hIdx = NUM_BUCKET+lIdx; + if( lIdx < NUM_BUCKET ) + { + myHistogram = localHistogram[hIdx]; + } +// GROUP_LDS_BARRIER; + +#if defined(USE_2LEVEL_REDUCE) + if( lIdx < NUM_BUCKET ) + { + localHistogram[hIdx] = localHistogram[hIdx-1]; + GROUP_MEM_FENCE; + + u32 u0, u1, u2; + u0 = localHistogram[hIdx-3]; + u1 = localHistogram[hIdx-2]; + u2 = localHistogram[hIdx-1]; + AtomAdd( localHistogram[hIdx], u0 + u1 + u2 ); + GROUP_MEM_FENCE; + u0 = localHistogram[hIdx-12]; + u1 = localHistogram[hIdx-8]; + u2 = localHistogram[hIdx-4]; + AtomAdd( localHistogram[hIdx], u0 + u1 + u2 ); + GROUP_MEM_FENCE; + } +#else + if( lIdx < NUM_BUCKET ) + { + localHistogram[hIdx] = localHistogram[hIdx-1]; + GROUP_MEM_FENCE; + localHistogram[hIdx] += localHistogram[hIdx-1]; + GROUP_MEM_FENCE; + localHistogram[hIdx] += localHistogram[hIdx-2]; + GROUP_MEM_FENCE; + localHistogram[hIdx] += localHistogram[hIdx-4]; + GROUP_MEM_FENCE; + localHistogram[hIdx] += localHistogram[hIdx-8]; + GROUP_MEM_FENCE; + } +#endif + +// GROUP_LDS_BARRIER; + } + + { + for(int ie=0; ie>(startBit+ibit)) & 0x3, + (sortData[1]>>(startBit+ibit)) & 0x3, + (sortData[2]>>(startBit+ibit)) & 0x3, + (sortData[3]>>(startBit+ibit)) & 0x3); + + u32 key4; + u32 sKeyPacked[4] = { 0, 0, 0, 0 }; + { + sKeyPacked[0] |= 1<<(8*b.x); + sKeyPacked[1] |= 1<<(8*b.y); + sKeyPacked[2] |= 1<<(8*b.z); + sKeyPacked[3] |= 1<<(8*b.w); + + key4 = sKeyPacked[0] + sKeyPacked[1] + sKeyPacked[2] + sKeyPacked[3]; + } + + u32 rankPacked; + u32 sumPacked; + { + rankPacked = localPrefixSum( key4, lIdx, sumPacked, WG_SIZE ); + } + + GROUP_LDS_BARRIER; + + u32 newOffset[4] = { 0,0,0,0 }; + { + u32 sumScanned = bit8Scan( sumPacked ); + + u32 scannedKeys[4]; + scannedKeys[0] = 1<<(8*b.x); + scannedKeys[1] = 1<<(8*b.y); + scannedKeys[2] = 1<<(8*b.z); + scannedKeys[3] = 1<<(8*b.w); + { // 4 scans at once + u32 sum4 = 0; + for(int ie=0; ie<4; ie++) + { + u32 tmp = scannedKeys[ie]; + scannedKeys[ie] = sum4; + sum4 += tmp; + } + } + + { + u32 sumPlusRank = sumScanned + rankPacked; + { u32 ie = b.x; + scannedKeys[0] += sumPlusRank; + newOffset[0] = unpack4Key( scannedKeys[0], ie ); + } + { u32 ie = b.y; + scannedKeys[1] += sumPlusRank; + newOffset[1] = unpack4Key( scannedKeys[1], ie ); + } + { u32 ie = b.z; + scannedKeys[2] += sumPlusRank; + newOffset[2] = unpack4Key( scannedKeys[2], ie ); + } + { u32 ie = b.w; + scannedKeys[3] += sumPlusRank; + newOffset[3] = unpack4Key( scannedKeys[3], ie ); + } + } + } + + + GROUP_LDS_BARRIER; + + { + ldsSortData[newOffset[0]] = sortData[0]; + ldsSortData[newOffset[1]] = sortData[1]; + ldsSortData[newOffset[2]] = sortData[2]; + ldsSortData[newOffset[3]] = sortData[3]; + + ldsSortVal[newOffset[0]] = sortVal[0]; + ldsSortVal[newOffset[1]] = sortVal[1]; + ldsSortVal[newOffset[2]] = sortVal[2]; + ldsSortVal[newOffset[3]] = sortVal[3]; + + GROUP_LDS_BARRIER; + + u32 dstAddr = 4*lIdx; + sortData[0] = ldsSortData[dstAddr+0]; + sortData[1] = ldsSortData[dstAddr+1]; + sortData[2] = ldsSortData[dstAddr+2]; + sortData[3] = ldsSortData[dstAddr+3]; + + sortVal[0] = ldsSortVal[dstAddr+0]; + sortVal[1] = ldsSortVal[dstAddr+1]; + sortVal[2] = ldsSortVal[dstAddr+2]; + sortVal[3] = ldsSortVal[dstAddr+3]; + + GROUP_LDS_BARRIER; + } + } +} + + + +[numthreads(WG_SIZE, 1, 1)] +void SortAndScatterKeyValueKernel( DEFAULT_ARGS ) +{ + u32 gIdx = GET_GLOBAL_IDX; + u32 lIdx = GET_LOCAL_IDX; + u32 wgIdx = GET_GROUP_IDX; + u32 wgSize = GET_GROUP_SIZE; + + const int n = m_n; + const int nWGs = m_nWGs; + const int startBit = m_startBit; + const int nBlocksPerWG = m_nBlocksPerWG; + + if( lIdx < (NUM_BUCKET) ) + { + localHistogramToCarry[lIdx] = rHistogram2[lIdx*nWGs + wgIdx]; + } + + GROUP_LDS_BARRIER; + + const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE; + + int nBlocks = n/blockSize - nBlocksPerWG*wgIdx; + + int addr = blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx; + + for(int iblock=0; iblock>startBit) & 0xf; + + { // create histogram + u32 setIdx = lIdx/16; + if( lIdx < NUM_BUCKET ) + { + localHistogram[lIdx] = 0; + } + ldsSortData[lIdx] = 0; + GROUP_LDS_BARRIER; + + for(int i=0; i 64 )\n" +" {\n" +" sorterSharedMemory[idx] += sorterSharedMemory[idx-64];\n" +" GROUP_MEM_FENCE;\n" +" }\n" +"\n" +" sorterSharedMemory[idx-1] += sorterSharedMemory[idx-2];\n" +" GROUP_MEM_FENCE;\n" +" }\n" +"#else\n" +" if( lIdx < 64 )\n" +" {\n" +" sorterSharedMemory[idx] += sorterSharedMemory[idx-1];\n" +" GROUP_MEM_FENCE;\n" +" sorterSharedMemory[idx] += sorterSharedMemory[idx-2]; \n" +" GROUP_MEM_FENCE;\n" +" sorterSharedMemory[idx] += sorterSharedMemory[idx-4];\n" +" GROUP_MEM_FENCE;\n" +" sorterSharedMemory[idx] += sorterSharedMemory[idx-8];\n" +" GROUP_MEM_FENCE;\n" +" sorterSharedMemory[idx] += sorterSharedMemory[idx-16];\n" +" GROUP_MEM_FENCE;\n" +" sorterSharedMemory[idx] += sorterSharedMemory[idx-32];\n" +" GROUP_MEM_FENCE;\n" +" if( wgSize > 64 )\n" +" {\n" +" sorterSharedMemory[idx] += sorterSharedMemory[idx-64];\n" +" GROUP_MEM_FENCE;\n" +" }\n" +"\n" +" sorterSharedMemory[idx-1] += sorterSharedMemory[idx-2];\n" +" GROUP_MEM_FENCE;\n" +" }\n" +"#endif\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" *totalSum = sorterSharedMemory[wgSize*2-1];\n" +" u32 addValue = sorterSharedMemory[lIdx+wgSize-1];\n" +" return addValue;\n" +"}\n" +"\n" +"//__attribute__((reqd_work_group_size(128,1,1)))\n" +"uint4 localPrefixSum128V( uint4 pData, uint lIdx, uint* totalSum, __local u32 sorterSharedMemory[] )\n" +"{\n" +" u32 s4 = prefixScanVectorEx( &pData );\n" +" u32 rank = localPrefixSum( s4, lIdx, totalSum, sorterSharedMemory, 128 );\n" +" return pData + make_uint4( rank, rank, rank, rank );\n" +"}\n" +"\n" +"\n" +"//__attribute__((reqd_work_group_size(64,1,1)))\n" +"uint4 localPrefixSum64V( uint4 pData, uint lIdx, uint* totalSum, __local u32 sorterSharedMemory[] )\n" +"{\n" +" u32 s4 = prefixScanVectorEx( &pData );\n" +" u32 rank = localPrefixSum( s4, lIdx, totalSum, sorterSharedMemory, 64 );\n" +" return pData + make_uint4( rank, rank, rank, rank );\n" +"}\n" +"\n" +"u32 unpack4Key( u32 key, int keyIdx ){ return (key>>(keyIdx*8)) & 0xff;}\n" +"\n" +"u32 bit8Scan(u32 v)\n" +"{\n" +" return (v<<8) + (v<<16) + (v<<24);\n" +"}\n" +"\n" +"//===\n" +"\n" +"\n" +"\n" +"\n" +"#define MY_HISTOGRAM(idx) localHistogramMat[(idx)*WG_SIZE+lIdx]\n" +"\n" +"\n" +"__kernel\n" +"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" +"void StreamCountKernel( __global u32* gSrc, __global u32* histogramOut, ConstBuffer cb )\n" +"{\n" +" __local u32 localHistogramMat[NUM_BUCKET*WG_SIZE];\n" +"\n" +" u32 gIdx = GET_GLOBAL_IDX;\n" +" u32 lIdx = GET_LOCAL_IDX;\n" +" u32 wgIdx = GET_GROUP_IDX;\n" +" u32 wgSize = GET_GROUP_SIZE;\n" +" const int startBit = cb.m_startBit;\n" +" const int n = cb.m_n;\n" +" const int nWGs = cb.m_nWGs;\n" +" const int nBlocksPerWG = cb.m_nBlocksPerWG;\n" +"\n" +" for(int i=0; i>startBit) & 0xf;\n" +"#if defined(NV_GPU)\n" +" MY_HISTOGRAM( localKey )++;\n" +"#else\n" +" AtomInc( MY_HISTOGRAM( localKey ) );\n" +"#endif\n" +" }\n" +" }\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +" \n" +" if( lIdx < NUM_BUCKET )\n" +" {\n" +" u32 sum = 0;\n" +" for(int i=0; i>startBit) & 0xf;\n" +"#if defined(NV_GPU)\n" +" MY_HISTOGRAM( localKey )++;\n" +"#else\n" +" AtomInc( MY_HISTOGRAM( localKey ) );\n" +"#endif\n" +" }\n" +" }\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +" \n" +" if( lIdx < NUM_BUCKET )\n" +" {\n" +" u32 sum = 0;\n" +" for(int i=0; i>startBit) & mask, (sortData[1]>>startBit) & mask, (sortData[2]>>startBit) & mask, (sortData[3]>>startBit) & mask );\n" +" uint4 prefixSum = SELECT_UINT4( make_uint4(1,1,1,1), make_uint4(0,0,0,0), cmpResult != make_uint4(0,0,0,0) );\n" +" u32 total;\n" +" prefixSum = localPrefixSum64V( prefixSum, lIdx, &total, ldsSortData );\n" +" {\n" +" uint4 localAddr = make_uint4(lIdx*4+0,lIdx*4+1,lIdx*4+2,lIdx*4+3);\n" +" uint4 dstAddr = localAddr - prefixSum + make_uint4( total, total, total, total );\n" +" dstAddr = SELECT_UINT4( prefixSum, dstAddr, cmpResult != make_uint4(0, 0, 0, 0) );\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" ldsSortData[dstAddr.x] = sortData[0];\n" +" ldsSortData[dstAddr.y] = sortData[1];\n" +" ldsSortData[dstAddr.z] = sortData[2];\n" +" ldsSortData[dstAddr.w] = sortData[3];\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" sortData[0] = ldsSortData[localAddr.x];\n" +" sortData[1] = ldsSortData[localAddr.y];\n" +" sortData[2] = ldsSortData[localAddr.z];\n" +" sortData[3] = ldsSortData[localAddr.w];\n" +"\n" +" GROUP_LDS_BARRIER;\n" +" }\n" +" }\n" +"}\n" +"\n" +"// 2 scan, 2 exchange\n" +"void sort4Bits1(u32 sortData[4], int startBit, int lIdx, __local u32* ldsSortData)\n" +"{\n" +" for(uint ibit=0; ibit>(startBit+ibit)) & 0x3, \n" +" (sortData[1]>>(startBit+ibit)) & 0x3, \n" +" (sortData[2]>>(startBit+ibit)) & 0x3, \n" +" (sortData[3]>>(startBit+ibit)) & 0x3);\n" +"\n" +" u32 key4;\n" +" u32 sKeyPacked[4] = { 0, 0, 0, 0 };\n" +" {\n" +" sKeyPacked[0] |= 1<<(8*b.x);\n" +" sKeyPacked[1] |= 1<<(8*b.y);\n" +" sKeyPacked[2] |= 1<<(8*b.z);\n" +" sKeyPacked[3] |= 1<<(8*b.w);\n" +"\n" +" key4 = sKeyPacked[0] + sKeyPacked[1] + sKeyPacked[2] + sKeyPacked[3];\n" +" }\n" +"\n" +" u32 rankPacked;\n" +" u32 sumPacked;\n" +" {\n" +" rankPacked = localPrefixSum( key4, lIdx, &sumPacked, ldsSortData, WG_SIZE );\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" u32 newOffset[4] = { 0,0,0,0 };\n" +" {\n" +" u32 sumScanned = bit8Scan( sumPacked );\n" +"\n" +" u32 scannedKeys[4];\n" +" scannedKeys[0] = 1<<(8*b.x);\n" +" scannedKeys[1] = 1<<(8*b.y);\n" +" scannedKeys[2] = 1<<(8*b.z);\n" +" scannedKeys[3] = 1<<(8*b.w);\n" +" { // 4 scans at once\n" +" u32 sum4 = 0;\n" +" for(int ie=0; ie<4; ie++)\n" +" {\n" +" u32 tmp = scannedKeys[ie];\n" +" scannedKeys[ie] = sum4;\n" +" sum4 += tmp;\n" +" }\n" +" }\n" +"\n" +" {\n" +" u32 sumPlusRank = sumScanned + rankPacked;\n" +" { u32 ie = b.x;\n" +" scannedKeys[0] += sumPlusRank;\n" +" newOffset[0] = unpack4Key( scannedKeys[0], ie );\n" +" }\n" +" { u32 ie = b.y;\n" +" scannedKeys[1] += sumPlusRank;\n" +" newOffset[1] = unpack4Key( scannedKeys[1], ie );\n" +" }\n" +" { u32 ie = b.z;\n" +" scannedKeys[2] += sumPlusRank;\n" +" newOffset[2] = unpack4Key( scannedKeys[2], ie );\n" +" }\n" +" { u32 ie = b.w;\n" +" scannedKeys[3] += sumPlusRank;\n" +" newOffset[3] = unpack4Key( scannedKeys[3], ie );\n" +" }\n" +" }\n" +" }\n" +"\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" {\n" +" ldsSortData[newOffset[0]] = sortData[0];\n" +" ldsSortData[newOffset[1]] = sortData[1];\n" +" ldsSortData[newOffset[2]] = sortData[2];\n" +" ldsSortData[newOffset[3]] = sortData[3];\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" u32 dstAddr = 4*lIdx;\n" +" sortData[0] = ldsSortData[dstAddr+0];\n" +" sortData[1] = ldsSortData[dstAddr+1];\n" +" sortData[2] = ldsSortData[dstAddr+2];\n" +" sortData[3] = ldsSortData[dstAddr+3];\n" +"\n" +" GROUP_LDS_BARRIER;\n" +" }\n" +" }\n" +"}\n" +"\n" +"#define SET_HISTOGRAM(setIdx, key) ldsSortData[(setIdx)*NUM_BUCKET+key]\n" +"\n" +"__kernel\n" +"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" +"void SortAndScatterKernel( __global const u32* restrict gSrc, __global const u32* rHistogram, __global u32* restrict gDst, ConstBuffer cb )\n" +"{\n" +" __local u32 ldsSortData[WG_SIZE*ELEMENTS_PER_WORK_ITEM+16];\n" +" __local u32 localHistogramToCarry[NUM_BUCKET];\n" +" __local u32 localHistogram[NUM_BUCKET*2];\n" +"\n" +" u32 gIdx = GET_GLOBAL_IDX;\n" +" u32 lIdx = GET_LOCAL_IDX;\n" +" u32 wgIdx = GET_GROUP_IDX;\n" +" u32 wgSize = GET_GROUP_SIZE;\n" +"\n" +" const int n = cb.m_n;\n" +" const int nWGs = cb.m_nWGs;\n" +" const int startBit = cb.m_startBit;\n" +" const int nBlocksPerWG = cb.m_nBlocksPerWG;\n" +"\n" +" if( lIdx < (NUM_BUCKET) )\n" +" {\n" +" localHistogramToCarry[lIdx] = rHistogram[lIdx*nWGs + wgIdx];\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE;\n" +"\n" +" int nBlocks = n/blockSize - nBlocksPerWG*wgIdx;\n" +"\n" +" int addr = blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx;\n" +"\n" +" for(int iblock=0; iblock>startBit) & 0xf;\n" +"\n" +" { // create histogram\n" +" u32 setIdx = lIdx/16;\n" +" if( lIdx < NUM_BUCKET )\n" +" {\n" +" localHistogram[lIdx] = 0;\n" +" }\n" +" ldsSortData[lIdx] = 0;\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" for(int i=0; i>(startBit+ibit)) & 0x3, \n" +" (sortData[1]>>(startBit+ibit)) & 0x3, \n" +" (sortData[2]>>(startBit+ibit)) & 0x3, \n" +" (sortData[3]>>(startBit+ibit)) & 0x3);\n" +"\n" +" u32 key4;\n" +" u32 sKeyPacked[4] = { 0, 0, 0, 0 };\n" +" {\n" +" sKeyPacked[0] |= 1<<(8*b.x);\n" +" sKeyPacked[1] |= 1<<(8*b.y);\n" +" sKeyPacked[2] |= 1<<(8*b.z);\n" +" sKeyPacked[3] |= 1<<(8*b.w);\n" +"\n" +" key4 = sKeyPacked[0] + sKeyPacked[1] + sKeyPacked[2] + sKeyPacked[3];\n" +" }\n" +"\n" +" u32 rankPacked;\n" +" u32 sumPacked;\n" +" {\n" +" rankPacked = localPrefixSum( key4, lIdx, &sumPacked, ldsSortData, WG_SIZE );\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" u32 newOffset[4] = { 0,0,0,0 };\n" +" {\n" +" u32 sumScanned = bit8Scan( sumPacked );\n" +"\n" +" u32 scannedKeys[4];\n" +" scannedKeys[0] = 1<<(8*b.x);\n" +" scannedKeys[1] = 1<<(8*b.y);\n" +" scannedKeys[2] = 1<<(8*b.z);\n" +" scannedKeys[3] = 1<<(8*b.w);\n" +" { // 4 scans at once\n" +" u32 sum4 = 0;\n" +" for(int ie=0; ie<4; ie++)\n" +" {\n" +" u32 tmp = scannedKeys[ie];\n" +" scannedKeys[ie] = sum4;\n" +" sum4 += tmp;\n" +" }\n" +" }\n" +"\n" +" {\n" +" u32 sumPlusRank = sumScanned + rankPacked;\n" +" { u32 ie = b.x;\n" +" scannedKeys[0] += sumPlusRank;\n" +" newOffset[0] = unpack4Key( scannedKeys[0], ie );\n" +" }\n" +" { u32 ie = b.y;\n" +" scannedKeys[1] += sumPlusRank;\n" +" newOffset[1] = unpack4Key( scannedKeys[1], ie );\n" +" }\n" +" { u32 ie = b.z;\n" +" scannedKeys[2] += sumPlusRank;\n" +" newOffset[2] = unpack4Key( scannedKeys[2], ie );\n" +" }\n" +" { u32 ie = b.w;\n" +" scannedKeys[3] += sumPlusRank;\n" +" newOffset[3] = unpack4Key( scannedKeys[3], ie );\n" +" }\n" +" }\n" +" }\n" +"\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" {\n" +" ldsSortData[newOffset[0]] = sortData[0];\n" +" ldsSortData[newOffset[1]] = sortData[1];\n" +" ldsSortData[newOffset[2]] = sortData[2];\n" +" ldsSortData[newOffset[3]] = sortData[3];\n" +"\n" +" ldsSortVal[newOffset[0]] = sortVal[0];\n" +" ldsSortVal[newOffset[1]] = sortVal[1];\n" +" ldsSortVal[newOffset[2]] = sortVal[2];\n" +" ldsSortVal[newOffset[3]] = sortVal[3];\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" u32 dstAddr = 4*lIdx;\n" +" sortData[0] = ldsSortData[dstAddr+0];\n" +" sortData[1] = ldsSortData[dstAddr+1];\n" +" sortData[2] = ldsSortData[dstAddr+2];\n" +" sortData[3] = ldsSortData[dstAddr+3];\n" +"\n" +" sortVal[0] = ldsSortVal[dstAddr+0];\n" +" sortVal[1] = ldsSortVal[dstAddr+1];\n" +" sortVal[2] = ldsSortVal[dstAddr+2];\n" +" sortVal[3] = ldsSortVal[dstAddr+3];\n" +"\n" +" GROUP_LDS_BARRIER;\n" +" }\n" +" }\n" +"}\n" +"\n" +"\n" +"\n" +"__kernel\n" +"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" +"void SortAndScatterKeyValueKernel( __global const u32* restrict gSrc, __global const int* restrict gSrcVal, __global const u32* rHistogram, __global u32* restrict gDst, __global int* restrict gDstVal, ConstBuffer cb)\n" +"{\n" +" __local u32 ldsSortData[WG_SIZE*ELEMENTS_PER_WORK_ITEM+16];\n" +" __local int ldsSortVal[WG_SIZE*ELEMENTS_PER_WORK_ITEM+16];\n" +" __local u32 localHistogramToCarry[NUM_BUCKET];\n" +" __local u32 localHistogram[NUM_BUCKET*2];\n" +"\n" +" u32 gIdx = GET_GLOBAL_IDX;\n" +" u32 lIdx = GET_LOCAL_IDX;\n" +" u32 wgIdx = GET_GROUP_IDX;\n" +" u32 wgSize = GET_GROUP_SIZE;\n" +"\n" +" const int n = cb.m_n;\n" +" const int nWGs = cb.m_nWGs;\n" +" const int startBit = cb.m_startBit;\n" +" const int nBlocksPerWG = cb.m_nBlocksPerWG;\n" +"\n" +" if( lIdx < (NUM_BUCKET) )\n" +" {\n" +" localHistogramToCarry[lIdx] = rHistogram[lIdx*nWGs + wgIdx];\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE;\n" +"\n" +" int nBlocks = n/blockSize - nBlocksPerWG*wgIdx;\n" +"\n" +" int addr = blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx;\n" +"\n" +" for(int iblock=0; iblock>startBit) & 0xf;\n" +"\n" +" { // create histogram\n" +" u32 setIdx = lIdx/16;\n" +" if( lIdx < NUM_BUCKET )\n" +" {\n" +" localHistogram[lIdx] = 0;\n" +" }\n" +" ldsSortData[lIdx] = 0;\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" for(int i=0; i>startBit) & 0xf;\n" +"\n" +" { // create histogram\n" +" u32 setIdx = lIdx/16;\n" +" if( lIdx < NUM_BUCKET )\n" +" {\n" +" localHistogram[lIdx] = 0;\n" +" }\n" +" ldsSortData[lIdx] = 0;\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" for(int i=0; i gSrc : register( t0 );\n" +"StructuredBuffer gSrcVal : register( t1 );\n" +"StructuredBuffer rHistogram : register( t1 );\n" +"StructuredBuffer rHistogram2 : register( t2 );\n" +"RWStructuredBuffer histogramOut : register( u0 );\n" +"RWStructuredBuffer wHistogram1 : register( u0 );\n" +"RWStructuredBuffer gDst : register( u0 );\n" +"RWStructuredBuffer gDstVal : register( u1 );\n" +"\n" +"groupshared u32 localHistogramMat[NUM_BUCKET*WG_SIZE];\n" +"#define MY_HISTOGRAM(idx) localHistogramMat[(idx)*WG_SIZE+lIdx]\n" +"\n" +"\n" +"[numthreads(WG_SIZE, 1, 1)]\n" +"void StreamCountKernel( DEFAULT_ARGS )\n" +"{\n" +" u32 gIdx = GET_GLOBAL_IDX;\n" +" u32 lIdx = GET_LOCAL_IDX;\n" +" u32 wgIdx = GET_GROUP_IDX;\n" +" u32 wgSize = GET_GROUP_SIZE;\n" +" const int startBit = m_startBit;\n" +"\n" +" const int n = m_n;\n" +" const int nWGs = m_nWGs;\n" +" const int nBlocksPerWG = m_nBlocksPerWG;\n" +"\n" +" for(int i=0; i>startBit) & 0xf;\n" +"#if defined(NV_GPU)\n" +" MY_HISTOGRAM( localKey )++;\n" +"#else\n" +" AtomInc( MY_HISTOGRAM( localKey ) );\n" +"#endif\n" +" }\n" +" }\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +" \n" +" if( lIdx < NUM_BUCKET )\n" +" {\n" +" u32 sum = 0;\n" +" for(int i=0; i 64 )\n" +" {\n" +" ldsSortData[idx] += ldsSortData[idx-64];\n" +" GROUP_MEM_FENCE;\n" +" }\n" +"\n" +" ldsSortData[idx-1] += ldsSortData[idx-2];\n" +" GROUP_MEM_FENCE;\n" +" }\n" +"#else\n" +" if( lIdx < 64 )\n" +" {\n" +" ldsSortData[idx] += ldsSortData[idx-1];\n" +" GROUP_MEM_FENCE;\n" +" ldsSortData[idx] += ldsSortData[idx-2]; \n" +" GROUP_MEM_FENCE;\n" +" ldsSortData[idx] += ldsSortData[idx-4];\n" +" GROUP_MEM_FENCE;\n" +" ldsSortData[idx] += ldsSortData[idx-8];\n" +" GROUP_MEM_FENCE;\n" +" ldsSortData[idx] += ldsSortData[idx-16];\n" +" GROUP_MEM_FENCE;\n" +" ldsSortData[idx] += ldsSortData[idx-32];\n" +" GROUP_MEM_FENCE;\n" +" if( wgSize > 64 )\n" +" {\n" +" ldsSortData[idx] += ldsSortData[idx-64];\n" +" GROUP_MEM_FENCE;\n" +" }\n" +"\n" +" ldsSortData[idx-1] += ldsSortData[idx-2];\n" +" GROUP_MEM_FENCE;\n" +" }\n" +"#endif\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" totalSum = ldsSortData[wgSize*2-1];\n" +" u32 addValue = ldsSortData[lIdx+wgSize-1];\n" +" return addValue;\n" +"}\n" +"\n" +"//__attribute__((reqd_work_group_size(128,1,1)))\n" +"uint4 localPrefixSum128V( uint4 pData, uint lIdx, inout uint totalSum )\n" +"{\n" +" u32 s4 = prefixScanVectorEx( pData );\n" +" u32 rank = localPrefixSum( s4, lIdx, totalSum, 128 );\n" +" return pData + make_uint4( rank, rank, rank, rank );\n" +"}\n" +"\n" +"//__attribute__((reqd_work_group_size(64,1,1)))\n" +"uint4 localPrefixSum64V( uint4 pData, uint lIdx, inout uint totalSum )\n" +"{\n" +" u32 s4 = prefixScanVectorEx( pData );\n" +" u32 rank = localPrefixSum( s4, lIdx, totalSum, 64 );\n" +" return pData + make_uint4( rank, rank, rank, rank );\n" +"}\n" +"\n" +"\n" +"\n" +"\n" +"\n" +"#define nPerLane (nPerWI/4)\n" +"\n" +"// NUM_BUCKET*nWGs < 128*nPerWI\n" +"[numthreads(128, 1, 1)]\n" +"void PrefixScanKernel( DEFAULT_ARGS )\n" +"{\n" +" u32 lIdx = GET_LOCAL_IDX;\n" +" u32 wgIdx = GET_GROUP_IDX;\n" +" const int nWGs = m_nWGs;\n" +"\n" +" u32 data[nPerWI];\n" +" for(int i=0; i>(keyIdx*8)) & 0xff;}\n" +"\n" +"u32 bit8Scan(u32 v)\n" +"{\n" +" return (v<<8) + (v<<16) + (v<<24);\n" +"}\n" +"\n" +"\n" +"\n" +"\n" +"void sort4Bits1(inout u32 sortData[4], int startBit, int lIdx)\n" +"{\n" +"/*\n" +" for(uint ibit=0; ibit>(startBit+ibit)) & 0x3, \n" +" (sortData[1]>>(startBit+ibit)) & 0x3, \n" +" (sortData[2]>>(startBit+ibit)) & 0x3, \n" +" (sortData[3]>>(startBit+ibit)) & 0x3);\n" +"\n" +" u32 key4;\n" +" u32 sKeyPacked[4] = { 0, 0, 0, 0 };\n" +" {\n" +" sKeyPacked[0] |= 1<<(8*b.x);\n" +" sKeyPacked[1] |= 1<<(8*b.y);\n" +" sKeyPacked[2] |= 1<<(8*b.z);\n" +" sKeyPacked[3] |= 1<<(8*b.w);\n" +"\n" +" key4 = sKeyPacked[0] + sKeyPacked[1] + sKeyPacked[2] + sKeyPacked[3];\n" +" }\n" +"\n" +" u32 rankPacked;\n" +" u32 sumPacked;\n" +" {\n" +" rankPacked = localPrefixSum64VSingle( key4, lIdx, sumPacked );\n" +" }\n" +"\n" +"// GROUP_LDS_BARRIER;\n" +"\n" +" u32 sum[4] = { unpack4Key( sumPacked,0 ), unpack4Key( sumPacked,1 ), unpack4Key( sumPacked,2 ), unpack4Key( sumPacked,3 ) };\n" +"\n" +" {\n" +" u32 sum4 = 0;\n" +" for(int ie=0; ie<4; ie++)\n" +" {\n" +" u32 tmp = sum[ie];\n" +" sum[ie] = sum4;\n" +" sum4 += tmp;\n" +" }\n" +" }\n" +"\n" +" u32 newOffset[4] = { 0,0,0,0 };\n" +"\n" +" for(int ie=0; ie<4; ie++)\n" +" {\n" +" uint4 key = extractKeys( b, ie );\n" +" uint4 scannedKey = key;\n" +" prefixScanVectorEx( scannedKey );\n" +" uint offset = sum[ie] + unpack4Key( rankPacked, ie );\n" +" uint4 dstAddress = make_uint4( offset, offset, offset, offset ) + scannedKey;\n" +"\n" +" newOffset[0] += dstAddress.x*key.x;\n" +" newOffset[1] += dstAddress.y*key.y;\n" +" newOffset[2] += dstAddress.z*key.z;\n" +" newOffset[3] += dstAddress.w*key.w;\n" +" }\n" +"\n" +"\n" +"\n" +" {\n" +" ldsSortData[newOffset[0]] = sortData[0];\n" +" ldsSortData[newOffset[1]] = sortData[1];\n" +" ldsSortData[newOffset[2]] = sortData[2];\n" +" ldsSortData[newOffset[3]] = sortData[3];\n" +"\n" +"// GROUP_LDS_BARRIER;\n" +"\n" +" sortData[0] = ldsSortData[lIdx*4+0];\n" +" sortData[1] = ldsSortData[lIdx*4+1];\n" +" sortData[2] = ldsSortData[lIdx*4+2];\n" +" sortData[3] = ldsSortData[lIdx*4+3];\n" +"\n" +"// GROUP_LDS_BARRIER;\n" +" }\n" +" }\n" +"*/\n" +" for(uint ibit=0; ibit>(startBit+ibit)) & 0x3, \n" +" (sortData[1]>>(startBit+ibit)) & 0x3, \n" +" (sortData[2]>>(startBit+ibit)) & 0x3, \n" +" (sortData[3]>>(startBit+ibit)) & 0x3);\n" +"\n" +" u32 key4;\n" +" u32 sKeyPacked[4] = { 0, 0, 0, 0 };\n" +" {\n" +" sKeyPacked[0] |= 1<<(8*b.x);\n" +" sKeyPacked[1] |= 1<<(8*b.y);\n" +" sKeyPacked[2] |= 1<<(8*b.z);\n" +" sKeyPacked[3] |= 1<<(8*b.w);\n" +"\n" +" key4 = sKeyPacked[0] + sKeyPacked[1] + sKeyPacked[2] + sKeyPacked[3];\n" +" }\n" +"\n" +" u32 rankPacked;\n" +" u32 sumPacked;\n" +" {\n" +" rankPacked = localPrefixSum( key4, lIdx, sumPacked, WG_SIZE );\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" u32 newOffset[4] = { 0,0,0,0 };\n" +" {\n" +" u32 sumScanned = bit8Scan( sumPacked );\n" +"\n" +" u32 scannedKeys[4];\n" +" scannedKeys[0] = 1<<(8*b.x);\n" +" scannedKeys[1] = 1<<(8*b.y);\n" +" scannedKeys[2] = 1<<(8*b.z);\n" +" scannedKeys[3] = 1<<(8*b.w);\n" +" { // 4 scans at once\n" +" u32 sum4 = 0;\n" +" for(int ie=0; ie<4; ie++)\n" +" {\n" +" u32 tmp = scannedKeys[ie];\n" +" scannedKeys[ie] = sum4;\n" +" sum4 += tmp;\n" +" }\n" +" }\n" +"\n" +" {\n" +" u32 sumPlusRank = sumScanned + rankPacked;\n" +" { u32 ie = b.x;\n" +" scannedKeys[0] += sumPlusRank;\n" +" newOffset[0] = unpack4Key( scannedKeys[0], ie );\n" +" }\n" +" { u32 ie = b.y;\n" +" scannedKeys[1] += sumPlusRank;\n" +" newOffset[1] = unpack4Key( scannedKeys[1], ie );\n" +" }\n" +" { u32 ie = b.z;\n" +" scannedKeys[2] += sumPlusRank;\n" +" newOffset[2] = unpack4Key( scannedKeys[2], ie );\n" +" }\n" +" { u32 ie = b.w;\n" +" scannedKeys[3] += sumPlusRank;\n" +" newOffset[3] = unpack4Key( scannedKeys[3], ie );\n" +" }\n" +" }\n" +" }\n" +"\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" {\n" +" ldsSortData[newOffset[0]] = sortData[0];\n" +" ldsSortData[newOffset[1]] = sortData[1];\n" +" ldsSortData[newOffset[2]] = sortData[2];\n" +" ldsSortData[newOffset[3]] = sortData[3];\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" u32 dstAddr = 4*lIdx;\n" +" sortData[0] = ldsSortData[dstAddr+0];\n" +" sortData[1] = ldsSortData[dstAddr+1];\n" +" sortData[2] = ldsSortData[dstAddr+2];\n" +" sortData[3] = ldsSortData[dstAddr+3];\n" +"\n" +" GROUP_LDS_BARRIER;\n" +" }\n" +" }\n" +"}\n" +"\n" +"\n" +"groupshared u32 localHistogramToCarry[NUM_BUCKET];\n" +"groupshared u32 localHistogram[NUM_BUCKET*2];\n" +"#define SET_HISTOGRAM(setIdx, key) ldsSortData[(setIdx)*NUM_BUCKET+key]\n" +"\n" +"\n" +"[numthreads(WG_SIZE, 1, 1)]\n" +"void SortAndScatterKernel( DEFAULT_ARGS )\n" +"{\n" +" u32 gIdx = GET_GLOBAL_IDX;\n" +" u32 lIdx = GET_LOCAL_IDX;\n" +" u32 wgIdx = GET_GROUP_IDX;\n" +" u32 wgSize = GET_GROUP_SIZE;\n" +"\n" +" const int n = m_n;\n" +" const int nWGs = m_nWGs;\n" +" const int startBit = m_startBit;\n" +" const int nBlocksPerWG = m_nBlocksPerWG;\n" +"\n" +" if( lIdx < (NUM_BUCKET) )\n" +" {\n" +" localHistogramToCarry[lIdx] = rHistogram[lIdx*nWGs + wgIdx];\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE;\n" +"\n" +" int nBlocks = n/blockSize - nBlocksPerWG*wgIdx;\n" +"\n" +" int addr = blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx;\n" +"\n" +" for(int iblock=0; iblock>startBit) & 0xf;\n" +"\n" +" { // create histogram\n" +" u32 setIdx = lIdx/16;\n" +" if( lIdx < NUM_BUCKET )\n" +" {\n" +" localHistogram[lIdx] = 0;\n" +" }\n" +" ldsSortData[lIdx] = 0;\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" for(int i=0; i>startBit) & 0xf;\n" +"\n" +" { // create histogram\n" +" if( lIdx < NUM_BUCKET )\n" +" {\n" +" localHistogram[lIdx] = 0;\n" +" localHistogram[NUM_BUCKET+lIdx] = 0;\n" +" }\n" +"// GROUP_LDS_BARRIER;\n" +"\n" +" AtomInc( localHistogram[NUM_BUCKET+keys[0]] );\n" +" AtomInc( localHistogram[NUM_BUCKET+keys[1]] );\n" +" AtomInc( localHistogram[NUM_BUCKET+keys[2]] );\n" +" AtomInc( localHistogram[NUM_BUCKET+keys[3]] );\n" +" \n" +"// GROUP_LDS_BARRIER;\n" +" \n" +" uint hIdx = NUM_BUCKET+lIdx;\n" +" if( lIdx < NUM_BUCKET )\n" +" {\n" +" myHistogram = localHistogram[hIdx];\n" +" }\n" +"// GROUP_LDS_BARRIER;\n" +"\n" +"#if defined(USE_2LEVEL_REDUCE)\n" +" if( lIdx < NUM_BUCKET )\n" +" {\n" +" localHistogram[hIdx] = localHistogram[hIdx-1];\n" +" GROUP_MEM_FENCE;\n" +"\n" +" u32 u0, u1, u2;\n" +" u0 = localHistogram[hIdx-3];\n" +" u1 = localHistogram[hIdx-2];\n" +" u2 = localHistogram[hIdx-1];\n" +" AtomAdd( localHistogram[hIdx], u0 + u1 + u2 );\n" +" GROUP_MEM_FENCE;\n" +" u0 = localHistogram[hIdx-12];\n" +" u1 = localHistogram[hIdx-8];\n" +" u2 = localHistogram[hIdx-4];\n" +" AtomAdd( localHistogram[hIdx], u0 + u1 + u2 );\n" +" GROUP_MEM_FENCE;\n" +" }\n" +"#else\n" +" if( lIdx < NUM_BUCKET )\n" +" {\n" +" localHistogram[hIdx] = localHistogram[hIdx-1];\n" +" GROUP_MEM_FENCE;\n" +" localHistogram[hIdx] += localHistogram[hIdx-1];\n" +" GROUP_MEM_FENCE;\n" +" localHistogram[hIdx] += localHistogram[hIdx-2];\n" +" GROUP_MEM_FENCE;\n" +" localHistogram[hIdx] += localHistogram[hIdx-4];\n" +" GROUP_MEM_FENCE;\n" +" localHistogram[hIdx] += localHistogram[hIdx-8];\n" +" GROUP_MEM_FENCE;\n" +" }\n" +"#endif\n" +"\n" +"// GROUP_LDS_BARRIER;\n" +" }\n" +"\n" +" {\n" +" for(int ie=0; ie>(startBit+ibit)) & 0x3, \n" +" (sortData[1]>>(startBit+ibit)) & 0x3, \n" +" (sortData[2]>>(startBit+ibit)) & 0x3, \n" +" (sortData[3]>>(startBit+ibit)) & 0x3);\n" +"\n" +" u32 key4;\n" +" u32 sKeyPacked[4] = { 0, 0, 0, 0 };\n" +" {\n" +" sKeyPacked[0] |= 1<<(8*b.x);\n" +" sKeyPacked[1] |= 1<<(8*b.y);\n" +" sKeyPacked[2] |= 1<<(8*b.z);\n" +" sKeyPacked[3] |= 1<<(8*b.w);\n" +"\n" +" key4 = sKeyPacked[0] + sKeyPacked[1] + sKeyPacked[2] + sKeyPacked[3];\n" +" }\n" +"\n" +" u32 rankPacked;\n" +" u32 sumPacked;\n" +" {\n" +" rankPacked = localPrefixSum( key4, lIdx, sumPacked, WG_SIZE );\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" u32 newOffset[4] = { 0,0,0,0 };\n" +" {\n" +" u32 sumScanned = bit8Scan( sumPacked );\n" +"\n" +" u32 scannedKeys[4];\n" +" scannedKeys[0] = 1<<(8*b.x);\n" +" scannedKeys[1] = 1<<(8*b.y);\n" +" scannedKeys[2] = 1<<(8*b.z);\n" +" scannedKeys[3] = 1<<(8*b.w);\n" +" { // 4 scans at once\n" +" u32 sum4 = 0;\n" +" for(int ie=0; ie<4; ie++)\n" +" {\n" +" u32 tmp = scannedKeys[ie];\n" +" scannedKeys[ie] = sum4;\n" +" sum4 += tmp;\n" +" }\n" +" }\n" +"\n" +" {\n" +" u32 sumPlusRank = sumScanned + rankPacked;\n" +" { u32 ie = b.x;\n" +" scannedKeys[0] += sumPlusRank;\n" +" newOffset[0] = unpack4Key( scannedKeys[0], ie );\n" +" }\n" +" { u32 ie = b.y;\n" +" scannedKeys[1] += sumPlusRank;\n" +" newOffset[1] = unpack4Key( scannedKeys[1], ie );\n" +" }\n" +" { u32 ie = b.z;\n" +" scannedKeys[2] += sumPlusRank;\n" +" newOffset[2] = unpack4Key( scannedKeys[2], ie );\n" +" }\n" +" { u32 ie = b.w;\n" +" scannedKeys[3] += sumPlusRank;\n" +" newOffset[3] = unpack4Key( scannedKeys[3], ie );\n" +" }\n" +" }\n" +" }\n" +"\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" {\n" +" ldsSortData[newOffset[0]] = sortData[0];\n" +" ldsSortData[newOffset[1]] = sortData[1];\n" +" ldsSortData[newOffset[2]] = sortData[2];\n" +" ldsSortData[newOffset[3]] = sortData[3];\n" +"\n" +" ldsSortVal[newOffset[0]] = sortVal[0];\n" +" ldsSortVal[newOffset[1]] = sortVal[1];\n" +" ldsSortVal[newOffset[2]] = sortVal[2];\n" +" ldsSortVal[newOffset[3]] = sortVal[3];\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" u32 dstAddr = 4*lIdx;\n" +" sortData[0] = ldsSortData[dstAddr+0];\n" +" sortData[1] = ldsSortData[dstAddr+1];\n" +" sortData[2] = ldsSortData[dstAddr+2];\n" +" sortData[3] = ldsSortData[dstAddr+3];\n" +"\n" +" sortVal[0] = ldsSortVal[dstAddr+0];\n" +" sortVal[1] = ldsSortVal[dstAddr+1];\n" +" sortVal[2] = ldsSortVal[dstAddr+2];\n" +" sortVal[3] = ldsSortVal[dstAddr+3];\n" +"\n" +" GROUP_LDS_BARRIER;\n" +" }\n" +" }\n" +"}\n" +"\n" +"\n" +"\n" +"[numthreads(WG_SIZE, 1, 1)]\n" +"void SortAndScatterKeyValueKernel( DEFAULT_ARGS )\n" +"{\n" +" u32 gIdx = GET_GLOBAL_IDX;\n" +" u32 lIdx = GET_LOCAL_IDX;\n" +" u32 wgIdx = GET_GROUP_IDX;\n" +" u32 wgSize = GET_GROUP_SIZE;\n" +"\n" +" const int n = m_n;\n" +" const int nWGs = m_nWGs;\n" +" const int startBit = m_startBit;\n" +" const int nBlocksPerWG = m_nBlocksPerWG;\n" +"\n" +" if( lIdx < (NUM_BUCKET) )\n" +" {\n" +" localHistogramToCarry[lIdx] = rHistogram2[lIdx*nWGs + wgIdx];\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE;\n" +"\n" +" int nBlocks = n/blockSize - nBlocksPerWG*wgIdx;\n" +"\n" +" int addr = blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx;\n" +"\n" +" for(int iblock=0; iblock>startBit) & 0xf;\n" +"\n" +" { // create histogram\n" +" u32 setIdx = lIdx/16;\n" +" if( lIdx < NUM_BUCKET )\n" +" {\n" +" localHistogram[lIdx] = 0;\n" +" }\n" +" ldsSortData[lIdx] = 0;\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" for(int i=0; i rHistogram : register(t0); + +RWStructuredBuffer dataToSort : register( u0 ); +RWStructuredBuffer dataToSortOut : register( u1 ); + + + +#define WG_SIZE 128 +#define ELEMENTS_PER_WORK_ITEM 4 +#define BITS_PER_PASS 4 +#define NUM_BUCKET (1<> targetKey; + key.y = (data.y & mask) >> targetKey; + key.z = (data.z & mask) >> targetKey; + key.w = (data.w & mask) >> targetKey; + return key; +} + +uint packKeys(uint lower, uint upper) +{ + return lower|(upper<<16); +} + +uint4 packKeys(uint4 lower, uint4 upper) +{ + return uint4( lower.x|(upper.x<<16), lower.y|(upper.y<<16), lower.z|(upper.z<<16), lower.w|(upper.w<<16) ); +} + +uint extractLower( uint data ) +{ + return data&0xffff; +} + +uint extractUpper( uint data ) +{ + return (data>>16)&0xffff; +} + +uint4 extractLower( uint4 data ) +{ + return uint4( data.x&0xffff, data.y&0xffff, data.z&0xffff, data.w&0xffff ); +} + +uint4 extractUpper( uint4 data ) +{ + return uint4( (data.x>>16)&0xffff, (data.y>>16)&0xffff, (data.z>>16)&0xffff, (data.w>>16)&0xffff ); +} + +[numthreads(WG_SIZE, 1, 1)] +void SortAndScatterKernel( DEFAULT_ARGS ) +{ + u32 lIdx = GET_LOCAL_IDX; + u32 wgIdx = GET_GROUP_IDX; + + if( lIdx < (NUM_BUCKET) ) + { + localHistogramToCarry[lIdx] = rHistogram[lIdx*m_nWorkGroupsToExecute + wgIdx]; + } + + GROUP_LDS_BARRIER; + + for(uint igroup=wgIdx*m_nBlocksPerGroup; igroup>m_startBit) & 0xf, (myData[1].key>>m_startBit) & 0xf, (myData[2].key>>m_startBit) & 0xf, (myData[3].key>>m_startBit) & 0xf); + for(uint targetKey=0; targetKey<(NUM_BUCKET); targetKey+=4) + { + uint4 key[4]; + uint keySet[2]; + { // pack 4 + uint4 scannedKey[4]; + key[0] = scannedKey[0] = extractKeys( b, targetKey+0 ); + key[1] = scannedKey[1] = extractKeys( b, targetKey+1 ); + key[2] = scannedKey[2] = extractKeys( b, targetKey+2 ); + key[3] = scannedKey[3] = extractKeys( b, targetKey+3 ); + { + uint s[4]; + s[0] = prefixScanVectorEx( scannedKey[0] ); + s[1] = prefixScanVectorEx( scannedKey[1] ); + s[2] = prefixScanVectorEx( scannedKey[2] ); + s[3] = prefixScanVectorEx( scannedKey[3] ); + keySet[0] = packKeys( s[0], s[1] ); + keySet[1] = packKeys( s[2], s[3] ); + } + } + + uint dstAddressBase[4]; + { + + uint totalSumPacked[2]; + uint dstAddressPacked[2]; + + localPrefixScan128Dual( keySet[0], keySet[1], lIdx, dstAddressPacked[0], dstAddressPacked[1], totalSumPacked[0], totalSumPacked[1] ); + + dstAddressBase[0] = extractLower( dstAddressPacked[0] ); + dstAddressBase[1] = extractUpper( dstAddressPacked[0] ); + dstAddressBase[2] = extractLower( dstAddressPacked[1] ); + dstAddressBase[3] = extractUpper( dstAddressPacked[1] ); + + uint4 histogram; + histogram.x = extractLower(totalSumPacked[0]); + histogram.y = extractUpper(totalSumPacked[0]); + histogram.z = extractLower(totalSumPacked[1]); + histogram.w = extractUpper(totalSumPacked[1]); + + if( lIdx == targetKey + 0 ) myHistogram = histogram.x; + else if( lIdx == targetKey + 1 ) myHistogram = histogram.y; + else if( lIdx == targetKey + 2 ) myHistogram = histogram.z; + else if( lIdx == targetKey + 3 ) myHistogram = histogram.w; + + uint histogramSum = prefixScanVectorEx( histogram ); + + if( lIdx == targetKey + 0 ) localPrefixSum[targetKey+0] = localOffset+histogram.x; + else if( lIdx == targetKey + 1 ) localPrefixSum[targetKey+1] = localOffset+histogram.y; + else if( lIdx == targetKey + 2 ) localPrefixSum[targetKey+2] = localOffset+histogram.z; + else if( lIdx == targetKey + 3 ) localPrefixSum[targetKey+3] = localOffset+histogram.w; + + localOffset += histogramSum; + } + + GROUP_LDS_BARRIER; + + + for(int ie=0; ie<4; ie++) + { + uint4 scannedKey = key[ie]; + prefixScanVectorEx( scannedKey ); + + uint offset = localPrefixSum[targetKey + ie] + dstAddressBase[ie]; + uint4 dstAddress = uint4( offset, offset, offset, offset ) + scannedKey; + + newOffset[0] += dstAddress.x*key[ie].x; + newOffset[1] += dstAddress.y*key[ie].y; + newOffset[2] += dstAddress.z*key[ie].z; + newOffset[3] += dstAddress.w*key[ie].w; + } + } + + { // local scatter + SET_LOCAL_SORT_DATA(newOffset[0], myData[0]); + SET_LOCAL_SORT_DATA(newOffset[1], myData[1]); + SET_LOCAL_SORT_DATA(newOffset[2], myData[2]); + SET_LOCAL_SORT_DATA(newOffset[3], myData[3]); + } + + GROUP_LDS_BARRIER; + + { // write data + for(int i=0; i> m_startBit) & 0xf; + int groupOffset = localHistogramToCarry[binIdx]; + int myIdx = dataIdx - localPrefixSum[binIdx]; + + dataToSortOut[ groupOffset + myIdx ] = localData; + } + } + + GROUP_LDS_BARRIER; + if( lIdx < NUM_BUCKET ) + { + localHistogramToCarry[lIdx] += myHistogram; + } + GROUP_LDS_BARRIER; + } +} + + +[numthreads(WG_SIZE, 1, 1)] +void SortAndScatterKernel1( DEFAULT_ARGS ) +{ + u32 lIdx = GET_LOCAL_IDX; + u32 wgIdx = GET_GROUP_IDX; + + if( lIdx < (NUM_BUCKET) ) + { + localHistogramToCarry[lIdx] = rHistogram[lIdx*m_nWorkGroupsToExecute + wgIdx.x]; + } + + GROUP_LDS_BARRIER; + + for(uint igroup=wgIdx.x*m_nBlocksPerGroup; igroup>ib) & 0x1, ~(myData[1].key>>ib) & 0x1, ~(myData[2].key>>ib) & 0x1, ~(myData[3].key>>ib) & 0x1); + uint total; + uint4 rankOfP = localPrefixSum128V( keys, lIdx, total ); + uint4 rankOfN = uint4(startAddrBlock, startAddrBlock+1, startAddrBlock+2, startAddrBlock+3) - rankOfP + uint4( total, total, total, total ); + + uint4 myAddr = (keys==uint4(1,1,1,1))? rankOfP: rankOfN; + + GROUP_LDS_BARRIER; + + SET_LOCAL_SORT_DATA( myAddr.x, myData[0] ); + SET_LOCAL_SORT_DATA( myAddr.y, myData[1] ); + SET_LOCAL_SORT_DATA( myAddr.z, myData[2] ); + SET_LOCAL_SORT_DATA( myAddr.w, myData[3] ); + + GROUP_LDS_BARRIER; + + GET_LOCAL_SORT_DATA( startAddrBlock+0, myData[0] ); + GET_LOCAL_SORT_DATA( startAddrBlock+1, myData[1] ); + GET_LOCAL_SORT_DATA( startAddrBlock+2, myData[2] ); + GET_LOCAL_SORT_DATA( startAddrBlock+3, myData[3] ); + } + + {// create histogram -> prefix sum + if( lIdx < NUM_BUCKET ) + { + localHistogram[lIdx] = 0; + localHistogram[NUM_BUCKET+lIdx] = 0; + } + GROUP_LDS_BARRIER; + uint4 keys = uint4((myData[0].key>>m_startBit) & 0xf, (myData[1].key>>m_startBit) & 0xf, (myData[2].key>>m_startBit) & 0xf, (myData[3].key>>m_startBit) & 0xf); + + InterlockedAdd( localHistogram[NUM_BUCKET+keys.x], 1 ); + InterlockedAdd( localHistogram[NUM_BUCKET+keys.y], 1 ); + InterlockedAdd( localHistogram[NUM_BUCKET+keys.z], 1 ); + InterlockedAdd( localHistogram[NUM_BUCKET+keys.w], 1 ); + + GROUP_LDS_BARRIER; + + uint hIdx = NUM_BUCKET+lIdx; + if( lIdx < NUM_BUCKET ) + { + myHistogram = localHistogram[hIdx]; + } + GROUP_LDS_BARRIER; + + if( lIdx < NUM_BUCKET ) + { + localHistogram[hIdx] = localHistogram[hIdx-1]; + + localHistogram[hIdx] += localHistogram[hIdx-1]; + localHistogram[hIdx] += localHistogram[hIdx-2]; + localHistogram[hIdx] += localHistogram[hIdx-4]; + localHistogram[hIdx] += localHistogram[hIdx-8]; + } + + GROUP_LDS_BARRIER; + } +/* + {// write back + int numLocalElements = WG_SIZE*ELEMENTS_PER_WORK_ITEM; + startAddrBlock = lIdx*4; + uint startAddress = igroup*numLocalElements + startAddrBlock; + + for(int ie=0; ie>m_startBit)&0xf; + int groupOffset = localHistogramToCarry[binIdx]; + int myIdx = dataIdx - localHistogram[NUM_BUCKET+binIdx]; + dataToSortOut[ groupOffset + myIdx ] = myData[ie]; + } + } + + GROUP_LDS_BARRIER; + if( lIdx < NUM_BUCKET ) + { + localHistogramToCarry[lIdx] += myHistogram; + } + GROUP_LDS_BARRIER; + + } +} + +/* +[numthreads(WG_SIZE, 1, 1)] +void SortAndScatterKernel1( uint3 gIdx : SV_GroupID, uint3 lIdx : SV_GroupThreadID ) +{ + if( lIdx.x < (NUM_BUCKET) ) + { + localHistogramToCarry[lIdx.x] = rHistogram[lIdx.x*m_nWorkGroupsToExecute + gIdx.x]; + } + + GROUP_LDS_BARRIER; + + for(uint igroup=gIdx.x*m_nBlocksPerGroup; igroup>ib) & 0x1, ~(myData[1].key>>ib) & 0x1, ~(myData[2].key>>ib) & 0x1, ~(myData[3].key>>ib) & 0x1); + uint total; + uint4 rankOfP = localPrefixSum128V( keys, lIdx.x, total ); + uint4 rankOfN = uint4(startAddrBlock, startAddrBlock+1, startAddrBlock+2, startAddrBlock+3) - rankOfP + uint4( total, total, total, total ); + + uint4 myAddr = (keys==uint4(1,1,1,1))? rankOfP: rankOfN; + + GROUP_LDS_BARRIER; + + SET_LOCAL_SORT_DATA( myAddr.x, myData[0] ); + SET_LOCAL_SORT_DATA( myAddr.y, myData[1] ); + SET_LOCAL_SORT_DATA( myAddr.z, myData[2] ); + SET_LOCAL_SORT_DATA( myAddr.w, myData[3] ); + + GROUP_LDS_BARRIER; + + GET_LOCAL_SORT_DATA( startAddrBlock+0, myData[0] ); + GET_LOCAL_SORT_DATA( startAddrBlock+1, myData[1] ); + GET_LOCAL_SORT_DATA( startAddrBlock+2, myData[2] ); + GET_LOCAL_SORT_DATA( startAddrBlock+3, myData[3] ); + } + + {// create histogram -> prefix sum + if( lIdx.x < NUM_BUCKET ) + { + localHistogram[lIdx.x] = 0; + localHistogram[NUM_BUCKET+lIdx.x] = 0; + } + GROUP_LDS_BARRIER; + uint4 keys = uint4((myData[0].key>>m_startBit) & 0xf, (myData[1].key>>m_startBit) & 0xf, (myData[2].key>>m_startBit) & 0xf, (myData[3].key>>m_startBit) & 0xf); + + InterlockedAdd( localHistogram[NUM_BUCKET+keys.x], 1 ); + InterlockedAdd( localHistogram[NUM_BUCKET+keys.y], 1 ); + InterlockedAdd( localHistogram[NUM_BUCKET+keys.z], 1 ); + InterlockedAdd( localHistogram[NUM_BUCKET+keys.w], 1 ); + + GROUP_LDS_BARRIER; + + uint hIdx = NUM_BUCKET+lIdx.x; + if( lIdx.x < NUM_BUCKET ) + { + myHistogram = localHistogram[hIdx]; + } + GROUP_LDS_BARRIER; + + + if( lIdx.x < NUM_BUCKET ) + { + localHistogram[hIdx] = localHistogram[hIdx-1]; + + localHistogram[hIdx] += localHistogram[hIdx-1]; + localHistogram[hIdx] += localHistogram[hIdx-2]; + localHistogram[hIdx] += localHistogram[hIdx-4]; + localHistogram[hIdx] += localHistogram[hIdx-8]; + } + + GROUP_LDS_BARRIER; + } + {// write back + for(int ie=0; ie>m_startBit)&0xf; + int groupOffset = localHistogramToCarry[binIdx]; + int myIdx = dataIdx - localHistogram[NUM_BUCKET+binIdx]; + + dataToSortOut[ groupOffset + myIdx ] = myData[ie]; + } + } + + GROUP_LDS_BARRIER; + if( lIdx.x < NUM_BUCKET ) + { + localHistogramToCarry[lIdx.x] += myHistogram; + } + GROUP_LDS_BARRIER; + + } +} +*/ + +StructuredBuffer dataToSort1 : register( t0 ); +RWStructuredBuffer wHistogram1 : register(u0); + +#define MY_HISTOGRAM(idx) localHistogramMat[(idx)*WG_SIZE+lIdx.x] + +[numthreads(WG_SIZE, 1, 1)] +void StreamCountKernel( DEFAULT_ARGS ) +{ + u32 lIdx = GET_LOCAL_IDX; + u32 wgIdx = GET_GROUP_IDX; + + int myHistogram[NUM_BUCKET]; + + for(int i=0; i> m_startBit) & 0xf; + localKeys[1] = (localData1.key >> m_startBit) & 0xf; + localKeys[2] = (localData2.key >> m_startBit) & 0xf; + localKeys[3] = (localData3.key >> m_startBit) & 0xf; + } + + MY_HISTOGRAM( localKeys[0] )++; + MY_HISTOGRAM( localKeys[1] )++; + MY_HISTOGRAM( localKeys[2] )++; + MY_HISTOGRAM( localKeys[3] )++; + } + + GROUP_LDS_BARRIER; + + { // reduce to 1 + if( lIdx < 64 )//WG_SIZE/2 ) + { + for(int i=0; i> m_startBit) & 0xf; + localKeys[1] = (localData1.key >> m_startBit) & 0xf; + localKeys[2] = (localData2.key >> m_startBit) & 0xf; + localKeys[3] = (localData3.key >> m_startBit) & 0xf; + } + + myHistogram[ localKeys[0] ]++; + myHistogram[ localKeys[1] ]++; + myHistogram[ localKeys[2] ]++; + myHistogram[ localKeys[3] ]++; + } + + { // move to shared + for(int i=0; i 80*16 ) +[numthreads(WG_SIZE, 1, 1)] +void PrefixScanKernel( DEFAULT_ARGS ) +{ + u32 lIdx = GET_LOCAL_IDX; + u32 wgIdx = GET_GROUP_IDX; + + uint data[12] = {0,0,0,0,0,0,0,0,0,0,0,0}; + for(int i=0; i<12; i++) + { + if( int(12*lIdx+i) < NUM_BUCKET*m_nWorkGroupsToExecute ) + data[i] = wHistogram1[12*lIdx+i]; + } + + uint4 myData = uint4(0,0,0,0); + myData.x = data[0] + data[1]; + myData.y = data[2] + data[3]; + myData.z = data[4] + data[5]; + myData.w = data[6] + data[7]; + + + uint totalSum; + uint4 scanned = localPrefixSum128V( myData, lIdx, totalSum ); + + data[11] = scanned.w + data[9] + data[10]; + data[10] = scanned.w + data[9]; + data[9] = scanned.w; + data[8] = scanned.z + data[6] + data[7]; + data[7] = scanned.z + data[6]; + data[6] = scanned.z; + data[5] = scanned.y + data[3] + data[4]; + data[4] = scanned.y + data[3]; + data[3] = scanned.y; + data[2] = scanned.x + data[0] + data[1]; + data[1] = scanned.x + data[0]; + data[0] = scanned.x; + + for(int i=0; i<12; i++) + { + wHistogram1[12*lIdx+i] = data[i]; + } +} +/* +[numthreads(WG_SIZE, 1, 1)] +void PrefixScanKernel( DEFAULT_ARGS ) +{ + u32 lIdx = GET_LOCAL_IDX; + u32 wgIdx = GET_GROUP_IDX; + + uint data[8] = {0,0,0,0,0,0,0,0}; + for(int i=0; i<8; i++) + { + if( int(8*lIdx+i) < NUM_BUCKET*m_nWorkGroupsToExecute ) + data[i] = wHistogram1[8*lIdx+i]; + } + + uint4 myData = uint4(0,0,0,0); + myData.x = data[0] + data[1]; + myData.y = data[2] + data[3]; + myData.z = data[4] + data[5]; + myData.w = data[6] + data[7]; + + + uint totalSum; + uint4 scanned = localPrefixSum128V( myData, lIdx, totalSum ); + + data[7] = scanned.w + data[6]; + data[6] = scanned.w;// + data[5]; + data[5] = scanned.z + data[4]; + data[4] = scanned.z;// + data[3]; + data[3] = scanned.y + data[2]; + data[2] = scanned.y;// + data[1]; + data[1] = scanned.x + data[0]; + data[0] = scanned.x; + + for(int i=0; i<8; i++) + { + wHistogram1[8*lIdx+i] = data[i]; + } +} +*/ + + +[numthreads(WG_SIZE, 1, 1)] +void CopyKernel( DEFAULT_ARGS ) +{ + u32 lIdx = GET_LOCAL_IDX; + u32 wgIdx = GET_GROUP_IDX; + + for(uint igroup=wgIdx.x*m_nBlocksPerGroup; igroup rHistogram : register(t0);\n" +"\n" +"RWStructuredBuffer dataToSort : register( u0 );\n" +"RWStructuredBuffer dataToSortOut : register( u1 );\n" +"\n" +"\n" +"\n" +"#define WG_SIZE 128\n" +"#define ELEMENTS_PER_WORK_ITEM 4\n" +"#define BITS_PER_PASS 4\n" +"#define NUM_BUCKET (1<> targetKey;\n" +" key.y = (data.y & mask) >> targetKey;\n" +" key.z = (data.z & mask) >> targetKey;\n" +" key.w = (data.w & mask) >> targetKey;\n" +" return key;\n" +"}\n" +"\n" +"uint packKeys(uint lower, uint upper)\n" +"{\n" +" return lower|(upper<<16);\n" +"}\n" +"\n" +"uint4 packKeys(uint4 lower, uint4 upper)\n" +"{\n" +" return uint4( lower.x|(upper.x<<16), lower.y|(upper.y<<16), lower.z|(upper.z<<16), lower.w|(upper.w<<16) );\n" +"}\n" +"\n" +"uint extractLower( uint data )\n" +"{\n" +" return data&0xffff;\n" +"}\n" +"\n" +"uint extractUpper( uint data )\n" +"{\n" +" return (data>>16)&0xffff;\n" +"}\n" +"\n" +"uint4 extractLower( uint4 data )\n" +"{\n" +" return uint4( data.x&0xffff, data.y&0xffff, data.z&0xffff, data.w&0xffff );\n" +"}\n" +"\n" +"uint4 extractUpper( uint4 data )\n" +"{\n" +" return uint4( (data.x>>16)&0xffff, (data.y>>16)&0xffff, (data.z>>16)&0xffff, (data.w>>16)&0xffff );\n" +"}\n" +"\n" +"[numthreads(WG_SIZE, 1, 1)]\n" +"void SortAndScatterKernel( DEFAULT_ARGS ) \n" +"{\n" +" u32 lIdx = GET_LOCAL_IDX;\n" +" u32 wgIdx = GET_GROUP_IDX;\n" +"\n" +" if( lIdx < (NUM_BUCKET) )\n" +" {\n" +" localHistogramToCarry[lIdx] = rHistogram[lIdx*m_nWorkGroupsToExecute + wgIdx];\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" for(uint igroup=wgIdx*m_nBlocksPerGroup; igroup>m_startBit) & 0xf, (myData[1].key>>m_startBit) & 0xf, (myData[2].key>>m_startBit) & 0xf, (myData[3].key>>m_startBit) & 0xf);\n" +" for(uint targetKey=0; targetKey<(NUM_BUCKET); targetKey+=4)\n" +" {\n" +" uint4 key[4];\n" +" uint keySet[2];\n" +" { // pack 4\n" +" uint4 scannedKey[4];\n" +" key[0] = scannedKey[0] = extractKeys( b, targetKey+0 );\n" +" key[1] = scannedKey[1] = extractKeys( b, targetKey+1 );\n" +" key[2] = scannedKey[2] = extractKeys( b, targetKey+2 );\n" +" key[3] = scannedKey[3] = extractKeys( b, targetKey+3 );\n" +" {\n" +" uint s[4];\n" +" s[0] = prefixScanVectorEx( scannedKey[0] );\n" +" s[1] = prefixScanVectorEx( scannedKey[1] );\n" +" s[2] = prefixScanVectorEx( scannedKey[2] );\n" +" s[3] = prefixScanVectorEx( scannedKey[3] );\n" +" keySet[0] = packKeys( s[0], s[1] );\n" +" keySet[1] = packKeys( s[2], s[3] );\n" +" }\n" +" }\n" +"\n" +" uint dstAddressBase[4];\n" +" {\n" +"\n" +" uint totalSumPacked[2];\n" +" uint dstAddressPacked[2];\n" +"\n" +" localPrefixScan128Dual( keySet[0], keySet[1], lIdx, dstAddressPacked[0], dstAddressPacked[1], totalSumPacked[0], totalSumPacked[1] );\n" +"\n" +" dstAddressBase[0] = extractLower( dstAddressPacked[0] );\n" +" dstAddressBase[1] = extractUpper( dstAddressPacked[0] );\n" +" dstAddressBase[2] = extractLower( dstAddressPacked[1] );\n" +" dstAddressBase[3] = extractUpper( dstAddressPacked[1] );\n" +"\n" +" uint4 histogram;\n" +" histogram.x = extractLower(totalSumPacked[0]);\n" +" histogram.y = extractUpper(totalSumPacked[0]);\n" +" histogram.z = extractLower(totalSumPacked[1]);\n" +" histogram.w = extractUpper(totalSumPacked[1]);\n" +"\n" +" if( lIdx == targetKey + 0 ) myHistogram = histogram.x;\n" +" else if( lIdx == targetKey + 1 ) myHistogram = histogram.y;\n" +" else if( lIdx == targetKey + 2 ) myHistogram = histogram.z;\n" +" else if( lIdx == targetKey + 3 ) myHistogram = histogram.w;\n" +" \n" +" uint histogramSum = prefixScanVectorEx( histogram );\n" +"\n" +" if( lIdx == targetKey + 0 ) localPrefixSum[targetKey+0] = localOffset+histogram.x;\n" +" else if( lIdx == targetKey + 1 ) localPrefixSum[targetKey+1] = localOffset+histogram.y;\n" +" else if( lIdx == targetKey + 2 ) localPrefixSum[targetKey+2] = localOffset+histogram.z;\n" +" else if( lIdx == targetKey + 3 ) localPrefixSum[targetKey+3] = localOffset+histogram.w;\n" +"\n" +" localOffset += histogramSum;\n" +" }\n" +" \n" +" GROUP_LDS_BARRIER;\n" +"\n" +"\n" +" for(int ie=0; ie<4; ie++)\n" +" {\n" +" uint4 scannedKey = key[ie];\n" +" prefixScanVectorEx( scannedKey );\n" +"\n" +" uint offset = localPrefixSum[targetKey + ie] + dstAddressBase[ie];\n" +" uint4 dstAddress = uint4( offset, offset, offset, offset ) + scannedKey;\n" +"\n" +" newOffset[0] += dstAddress.x*key[ie].x;\n" +" newOffset[1] += dstAddress.y*key[ie].y;\n" +" newOffset[2] += dstAddress.z*key[ie].z;\n" +" newOffset[3] += dstAddress.w*key[ie].w;\n" +" }\n" +" }\n" +"\n" +" { // local scatter\n" +" SET_LOCAL_SORT_DATA(newOffset[0], myData[0]);\n" +" SET_LOCAL_SORT_DATA(newOffset[1], myData[1]);\n" +" SET_LOCAL_SORT_DATA(newOffset[2], myData[2]);\n" +" SET_LOCAL_SORT_DATA(newOffset[3], myData[3]);\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" { // write data\n" +" for(int i=0; i> m_startBit) & 0xf;\n" +" int groupOffset = localHistogramToCarry[binIdx];\n" +" int myIdx = dataIdx - localPrefixSum[binIdx];\n" +"\n" +" dataToSortOut[ groupOffset + myIdx ] = localData;\n" +" }\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +" if( lIdx < NUM_BUCKET )\n" +" {\n" +" localHistogramToCarry[lIdx] += myHistogram;\n" +" }\n" +" GROUP_LDS_BARRIER;\n" +" }\n" +"}\n" +"\n" +"\n" +"[numthreads(WG_SIZE, 1, 1)]\n" +"void SortAndScatterKernel1( DEFAULT_ARGS )\n" +"{\n" +" u32 lIdx = GET_LOCAL_IDX;\n" +" u32 wgIdx = GET_GROUP_IDX;\n" +"\n" +" if( lIdx < (NUM_BUCKET) )\n" +" {\n" +" localHistogramToCarry[lIdx] = rHistogram[lIdx*m_nWorkGroupsToExecute + wgIdx.x];\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" for(uint igroup=wgIdx.x*m_nBlocksPerGroup; igroup>ib) & 0x1, ~(myData[1].key>>ib) & 0x1, ~(myData[2].key>>ib) & 0x1, ~(myData[3].key>>ib) & 0x1);\n" +" uint total;\n" +" uint4 rankOfP = localPrefixSum128V( keys, lIdx, total );\n" +" uint4 rankOfN = uint4(startAddrBlock, startAddrBlock+1, startAddrBlock+2, startAddrBlock+3) - rankOfP + uint4( total, total, total, total );\n" +"\n" +" uint4 myAddr = (keys==uint4(1,1,1,1))? rankOfP: rankOfN;\n" +" \n" +" GROUP_LDS_BARRIER;\n" +"\n" +" SET_LOCAL_SORT_DATA( myAddr.x, myData[0] );\n" +" SET_LOCAL_SORT_DATA( myAddr.y, myData[1] );\n" +" SET_LOCAL_SORT_DATA( myAddr.z, myData[2] );\n" +" SET_LOCAL_SORT_DATA( myAddr.w, myData[3] );\n" +"\n" +" GROUP_LDS_BARRIER;\n" +" \n" +" GET_LOCAL_SORT_DATA( startAddrBlock+0, myData[0] );\n" +" GET_LOCAL_SORT_DATA( startAddrBlock+1, myData[1] );\n" +" GET_LOCAL_SORT_DATA( startAddrBlock+2, myData[2] );\n" +" GET_LOCAL_SORT_DATA( startAddrBlock+3, myData[3] );\n" +" }\n" +"\n" +" {// create histogram -> prefix sum\n" +" if( lIdx < NUM_BUCKET )\n" +" {\n" +" localHistogram[lIdx] = 0;\n" +" localHistogram[NUM_BUCKET+lIdx] = 0;\n" +" }\n" +" GROUP_LDS_BARRIER;\n" +" uint4 keys = uint4((myData[0].key>>m_startBit) & 0xf, (myData[1].key>>m_startBit) & 0xf, (myData[2].key>>m_startBit) & 0xf, (myData[3].key>>m_startBit) & 0xf);\n" +" \n" +" InterlockedAdd( localHistogram[NUM_BUCKET+keys.x], 1 );\n" +" InterlockedAdd( localHistogram[NUM_BUCKET+keys.y], 1 );\n" +" InterlockedAdd( localHistogram[NUM_BUCKET+keys.z], 1 );\n" +" InterlockedAdd( localHistogram[NUM_BUCKET+keys.w], 1 );\n" +" \n" +" GROUP_LDS_BARRIER;\n" +" \n" +" uint hIdx = NUM_BUCKET+lIdx;\n" +" if( lIdx < NUM_BUCKET )\n" +" {\n" +" myHistogram = localHistogram[hIdx];\n" +" }\n" +" GROUP_LDS_BARRIER;\n" +" \n" +" if( lIdx < NUM_BUCKET )\n" +" {\n" +" localHistogram[hIdx] = localHistogram[hIdx-1];\n" +"\n" +" localHistogram[hIdx] += localHistogram[hIdx-1];\n" +" localHistogram[hIdx] += localHistogram[hIdx-2];\n" +" localHistogram[hIdx] += localHistogram[hIdx-4];\n" +" localHistogram[hIdx] += localHistogram[hIdx-8];\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +" }\n" +"/*\n" +" {// write back\n" +" int numLocalElements = WG_SIZE*ELEMENTS_PER_WORK_ITEM;\n" +" startAddrBlock = lIdx*4;\n" +" uint startAddress = igroup*numLocalElements + startAddrBlock;\n" +"\n" +" for(int ie=0; ie>m_startBit)&0xf;\n" +" int groupOffset = localHistogramToCarry[binIdx];\n" +" int myIdx = dataIdx - localHistogram[NUM_BUCKET+binIdx];\n" +" dataToSortOut[ groupOffset + myIdx ] = myData[ie];\n" +" }\n" +" }\n" +" \n" +" GROUP_LDS_BARRIER;\n" +" if( lIdx < NUM_BUCKET )\n" +" {\n" +" localHistogramToCarry[lIdx] += myHistogram;\n" +" }\n" +" GROUP_LDS_BARRIER;\n" +" \n" +" }\n" +"}\n" +"\n" +"/*\n" +"[numthreads(WG_SIZE, 1, 1)]\n" +"void SortAndScatterKernel1( uint3 gIdx : SV_GroupID, uint3 lIdx : SV_GroupThreadID )\n" +"{\n" +" if( lIdx.x < (NUM_BUCKET) )\n" +" {\n" +" localHistogramToCarry[lIdx.x] = rHistogram[lIdx.x*m_nWorkGroupsToExecute + gIdx.x];\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" for(uint igroup=gIdx.x*m_nBlocksPerGroup; igroup>ib) & 0x1, ~(myData[1].key>>ib) & 0x1, ~(myData[2].key>>ib) & 0x1, ~(myData[3].key>>ib) & 0x1);\n" +" uint total;\n" +" uint4 rankOfP = localPrefixSum128V( keys, lIdx.x, total );\n" +" uint4 rankOfN = uint4(startAddrBlock, startAddrBlock+1, startAddrBlock+2, startAddrBlock+3) - rankOfP + uint4( total, total, total, total );\n" +"\n" +" uint4 myAddr = (keys==uint4(1,1,1,1))? rankOfP: rankOfN;\n" +" \n" +" GROUP_LDS_BARRIER;\n" +"\n" +" SET_LOCAL_SORT_DATA( myAddr.x, myData[0] );\n" +" SET_LOCAL_SORT_DATA( myAddr.y, myData[1] );\n" +" SET_LOCAL_SORT_DATA( myAddr.z, myData[2] );\n" +" SET_LOCAL_SORT_DATA( myAddr.w, myData[3] );\n" +"\n" +" GROUP_LDS_BARRIER;\n" +" \n" +" GET_LOCAL_SORT_DATA( startAddrBlock+0, myData[0] );\n" +" GET_LOCAL_SORT_DATA( startAddrBlock+1, myData[1] );\n" +" GET_LOCAL_SORT_DATA( startAddrBlock+2, myData[2] );\n" +" GET_LOCAL_SORT_DATA( startAddrBlock+3, myData[3] );\n" +" }\n" +" \n" +" {// create histogram -> prefix sum\n" +" if( lIdx.x < NUM_BUCKET )\n" +" {\n" +" localHistogram[lIdx.x] = 0;\n" +" localHistogram[NUM_BUCKET+lIdx.x] = 0;\n" +" }\n" +" GROUP_LDS_BARRIER;\n" +" uint4 keys = uint4((myData[0].key>>m_startBit) & 0xf, (myData[1].key>>m_startBit) & 0xf, (myData[2].key>>m_startBit) & 0xf, (myData[3].key>>m_startBit) & 0xf);\n" +" \n" +" InterlockedAdd( localHistogram[NUM_BUCKET+keys.x], 1 );\n" +" InterlockedAdd( localHistogram[NUM_BUCKET+keys.y], 1 );\n" +" InterlockedAdd( localHistogram[NUM_BUCKET+keys.z], 1 );\n" +" InterlockedAdd( localHistogram[NUM_BUCKET+keys.w], 1 );\n" +" \n" +" GROUP_LDS_BARRIER;\n" +" \n" +" uint hIdx = NUM_BUCKET+lIdx.x;\n" +" if( lIdx.x < NUM_BUCKET )\n" +" {\n" +" myHistogram = localHistogram[hIdx];\n" +" }\n" +" GROUP_LDS_BARRIER;\n" +" \n" +"\n" +" if( lIdx.x < NUM_BUCKET )\n" +" {\n" +" localHistogram[hIdx] = localHistogram[hIdx-1];\n" +"\n" +" localHistogram[hIdx] += localHistogram[hIdx-1];\n" +" localHistogram[hIdx] += localHistogram[hIdx-2];\n" +" localHistogram[hIdx] += localHistogram[hIdx-4];\n" +" localHistogram[hIdx] += localHistogram[hIdx-8];\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +" }\n" +" {// write back\n" +" for(int ie=0; ie>m_startBit)&0xf;\n" +" int groupOffset = localHistogramToCarry[binIdx];\n" +" int myIdx = dataIdx - localHistogram[NUM_BUCKET+binIdx];\n" +" \n" +" dataToSortOut[ groupOffset + myIdx ] = myData[ie];\n" +" }\n" +" }\n" +" \n" +" GROUP_LDS_BARRIER;\n" +" if( lIdx.x < NUM_BUCKET )\n" +" {\n" +" localHistogramToCarry[lIdx.x] += myHistogram;\n" +" }\n" +" GROUP_LDS_BARRIER;\n" +" \n" +" }\n" +"}\n" +"*/\n" +"\n" +"StructuredBuffer dataToSort1 : register( t0 );\n" +"RWStructuredBuffer wHistogram1 : register(u0);\n" +"\n" +"#define MY_HISTOGRAM(idx) localHistogramMat[(idx)*WG_SIZE+lIdx.x]\n" +"\n" +"[numthreads(WG_SIZE, 1, 1)]\n" +"void StreamCountKernel( DEFAULT_ARGS ) \n" +"{\n" +" u32 lIdx = GET_LOCAL_IDX;\n" +" u32 wgIdx = GET_GROUP_IDX;\n" +"\n" +" int myHistogram[NUM_BUCKET];\n" +"\n" +" for(int i=0; i> m_startBit) & 0xf;\n" +" localKeys[1] = (localData1.key >> m_startBit) & 0xf;\n" +" localKeys[2] = (localData2.key >> m_startBit) & 0xf;\n" +" localKeys[3] = (localData3.key >> m_startBit) & 0xf;\n" +" }\n" +"\n" +" MY_HISTOGRAM( localKeys[0] )++;\n" +" MY_HISTOGRAM( localKeys[1] )++;\n" +" MY_HISTOGRAM( localKeys[2] )++;\n" +" MY_HISTOGRAM( localKeys[3] )++;\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" { // reduce to 1\n" +" if( lIdx < 64 )//WG_SIZE/2 )\n" +" {\n" +" for(int i=0; i> m_startBit) & 0xf;\n" +" localKeys[1] = (localData1.key >> m_startBit) & 0xf;\n" +" localKeys[2] = (localData2.key >> m_startBit) & 0xf;\n" +" localKeys[3] = (localData3.key >> m_startBit) & 0xf;\n" +" }\n" +"\n" +" myHistogram[ localKeys[0] ]++;\n" +" myHistogram[ localKeys[1] ]++;\n" +" myHistogram[ localKeys[2] ]++;\n" +" myHistogram[ localKeys[3] ]++;\n" +" }\n" +"\n" +" { // move to shared\n" +" for(int i=0; i 80*16 )\n" +"[numthreads(WG_SIZE, 1, 1)]\n" +"void PrefixScanKernel( DEFAULT_ARGS )\n" +"{\n" +" u32 lIdx = GET_LOCAL_IDX;\n" +" u32 wgIdx = GET_GROUP_IDX;\n" +"\n" +" uint data[12] = {0,0,0,0,0,0,0,0,0,0,0,0};\n" +" for(int i=0; i<12; i++)\n" +" {\n" +" if( int(12*lIdx+i) < NUM_BUCKET*m_nWorkGroupsToExecute )\n" +" data[i] = wHistogram1[12*lIdx+i];\n" +" }\n" +"\n" +" uint4 myData = uint4(0,0,0,0);\n" +" myData.x = data[0] + data[1];\n" +" myData.y = data[2] + data[3];\n" +" myData.z = data[4] + data[5];\n" +" myData.w = data[6] + data[7];\n" +"\n" +"\n" +" uint totalSum;\n" +" uint4 scanned = localPrefixSum128V( myData, lIdx, totalSum );\n" +"\n" +" data[11] = scanned.w + data[9] + data[10];\n" +" data[10] = scanned.w + data[9];\n" +" data[9] = scanned.w;\n" +" data[8] = scanned.z + data[6] + data[7];\n" +" data[7] = scanned.z + data[6];\n" +" data[6] = scanned.z;\n" +" data[5] = scanned.y + data[3] + data[4];\n" +" data[4] = scanned.y + data[3];\n" +" data[3] = scanned.y;\n" +" data[2] = scanned.x + data[0] + data[1];\n" +" data[1] = scanned.x + data[0];\n" +" data[0] = scanned.x;\n" +"\n" +" for(int i=0; i<12; i++)\n" +" {\n" +" wHistogram1[12*lIdx+i] = data[i];\n" +" }\n" +"}\n" +"/*\n" +"[numthreads(WG_SIZE, 1, 1)]\n" +"void PrefixScanKernel( DEFAULT_ARGS )\n" +"{\n" +" u32 lIdx = GET_LOCAL_IDX;\n" +" u32 wgIdx = GET_GROUP_IDX;\n" +"\n" +" uint data[8] = {0,0,0,0,0,0,0,0};\n" +" for(int i=0; i<8; i++)\n" +" {\n" +" if( int(8*lIdx+i) < NUM_BUCKET*m_nWorkGroupsToExecute )\n" +" data[i] = wHistogram1[8*lIdx+i];\n" +" }\n" +"\n" +" uint4 myData = uint4(0,0,0,0);\n" +" myData.x = data[0] + data[1];\n" +" myData.y = data[2] + data[3];\n" +" myData.z = data[4] + data[5];\n" +" myData.w = data[6] + data[7];\n" +"\n" +"\n" +" uint totalSum;\n" +" uint4 scanned = localPrefixSum128V( myData, lIdx, totalSum );\n" +"\n" +" data[7] = scanned.w + data[6];\n" +" data[6] = scanned.w;// + data[5];\n" +" data[5] = scanned.z + data[4];\n" +" data[4] = scanned.z;// + data[3];\n" +" data[3] = scanned.y + data[2];\n" +" data[2] = scanned.y;// + data[1];\n" +" data[1] = scanned.x + data[0];\n" +" data[0] = scanned.x;\n" +"\n" +" for(int i=0; i<8; i++)\n" +" {\n" +" wHistogram1[8*lIdx+i] = data[i];\n" +" }\n" +"}\n" +"*/\n" +"\n" +"\n" +"[numthreads(WG_SIZE, 1, 1)]\n" +"void CopyKernel( DEFAULT_ARGS )\n" +"{\n" +" u32 lIdx = GET_LOCAL_IDX;\n" +" u32 wgIdx = GET_GROUP_IDX;\n" +"\n" +" for(uint igroup=wgIdx.x*m_nBlocksPerGroup; igroup +class RadixSort : public RadixSortBase +{ + public: + struct Data + { + HostBuffer* m_workBuffer; + }; + + enum + { + BITS_PER_PASS = 8, + NUM_TABLES = (1<m_type == TYPE_HOST ); + + Data* data = new Data; + data->m_workBuffer = new HostBuffer( deviceData, maxSize ); + return data; + } + + static + void deallocate(Data* data) + { + delete data->m_workBuffer; + delete data; + } + + static + void execute(Data* data, Buffer& inout, int n, int sortBits = 32) + { + ADLASSERT( inout.getType() == TYPE_HOST ); + + int tables[NUM_TABLES]; + int counter[NUM_TABLES]; + + SortData* src = inout.m_ptr; + SortData* dst = data->m_workBuffer->m_ptr; + + int count=0; + for(int startBit=0; startBit> startBit) & (NUM_TABLES-1); + tables[tableIdx]++; + } + + // prefix scan + int sum = 0; + for(int i=0; i> startBit) & (NUM_TABLES-1); + + dst[tables[tableIdx] + counter[tableIdx]] = src[i]; + counter[tableIdx] ++; + } + + swap2( src, dst ); + count++; + } + + { + if (count&1) + //if( src != inout.m_ptr ) + { + memcpy( dst, src, sizeof(SortData)*n ); + } + } + } +}; diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/RadixSortSimpleCL.h b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/RadixSortSimpleCL.h new file mode 100644 index 000000000..8325529ff --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/RadixSortSimpleCL.h @@ -0,0 +1,134 @@ +static const char* radixSortSimpleKernelsCL = \ + "#pragma OPENCL EXTENSION cl_amd_printf : enable\n" + "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" + "\n" + "typedef unsigned int u32;\n" + "#define GET_GROUP_IDX get_group_id(0)\n" + "#define GET_LOCAL_IDX get_local_id(0)\n" + "#define GET_GLOBAL_IDX get_global_id(0)\n" + "#define GET_GROUP_SIZE get_local_size(0)\n" + "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" + "#define AtomInc(x) atom_inc(&(x))\n" + "#define AtomInc1(x, out) out = atom_inc(&(x))\n" + "\n" + "\n" + "#define WG_SIZE 128\n" + "#define NUM_PER_WI 4\n" + "\n" + "\n" + "typedef struct\n" + "{\n" + " u32 m_key;\n" + " u32 m_value;\n" + "}SortData;\n" + "\n" + "\n" + "typedef struct\n" + "{\n" + " u32 m_startBit;\n" + " u32 m_numGroups;\n" + " u32 m_padding[2];\n" + "} ConstBuffer;\n" + "\n" + "\n" + "__kernel\n" + "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" + "void LocalCountKernel(__global SortData* sortData,\n" + " __global u32* ldsHistogramOut,\n" + " ConstBuffer cb)\n" + "{\n" + " __local u32 ldsHistogram[16][256];\n" + "\n" + " int lIdx = GET_LOCAL_IDX;\n" + " int gIdx = GET_GLOBAL_IDX;\n" + "\n" + " for(int i=0; i<16; i++)\n" + " {\n" + " ldsHistogram[i][lIdx] = 0.f;\n" + " ldsHistogram[i][lIdx+128] = 0.f;\n" + " }\n" + "\n" + " GROUP_LDS_BARRIER;\n" + "\n" + " SortData datas[NUM_PER_WI];\n" + " datas[0] = sortData[gIdx*NUM_PER_WI+0];\n" + " datas[1] = sortData[gIdx*NUM_PER_WI+1];\n" + " datas[2] = sortData[gIdx*NUM_PER_WI+2];\n" + " datas[3] = sortData[gIdx*NUM_PER_WI+3];\n" + "\n" + " datas[0].m_key = (datas[0].m_key >> cb.m_startBit) & 0xff;\n" + " datas[1].m_key = (datas[1].m_key >> cb.m_startBit) & 0xff;\n" + " datas[2].m_key = (datas[2].m_key >> cb.m_startBit) & 0xff;\n" + " datas[3].m_key = (datas[3].m_key >> cb.m_startBit) & 0xff;\n" + "\n" + " int tableIdx = lIdx%16;\n" + "\n" + " AtomInc(ldsHistogram[tableIdx][datas[0].m_key]);\n" + " AtomInc(ldsHistogram[tableIdx][datas[1].m_key]);\n" + " AtomInc(ldsHistogram[tableIdx][datas[2].m_key]);\n" + " AtomInc(ldsHistogram[tableIdx][datas[3].m_key]);\n" + "\n" + " GROUP_LDS_BARRIER;\n" + "\n" + " u32 sum0, sum1;\n" + " sum0 = sum1 = 0;\n" + " for(int i=0; i<16; i++)\n" + " {\n" + " sum0 += ldsHistogram[i][lIdx];\n" + " sum1 += ldsHistogram[i][lIdx+128];\n" + " }\n" + "\n" + " ldsHistogramOut[lIdx*cb.m_numGroups+GET_GROUP_IDX] = sum0;\n" + " ldsHistogramOut[(lIdx+128)*cb.m_numGroups+GET_GROUP_IDX] = sum1;\n" + "}\n" + "\n" + "__kernel\n" + "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" + "void ScatterKernel(__global SortData* sortData,\n" + " __global SortData* sortDataOut,\n" + " __global u32* scannedHistogram,\n" + " ConstBuffer cb)\n" + "{\n" + " __local u32 ldsCurrentLocation[256];\n" + "\n" + " int lIdx = GET_LOCAL_IDX;\n" + " int gIdx = GET_GLOBAL_IDX;\n" + "\n" + " {\n" + " ldsCurrentLocation[lIdx] = scannedHistogram[lIdx*cb.m_numGroups+GET_GROUP_IDX];\n" + " ldsCurrentLocation[lIdx+128] = scannedHistogram[(lIdx+128)*cb.m_numGroups+GET_GROUP_IDX];\n" + " }\n" + "\n" + " GROUP_LDS_BARRIER;\n" + "\n" + " SortData datas[NUM_PER_WI];\n" + " int keys[NUM_PER_WI];\n" + " datas[0] = sortData[gIdx*NUM_PER_WI+0];\n" + " datas[1] = sortData[gIdx*NUM_PER_WI+1];\n" + " datas[2] = sortData[gIdx*NUM_PER_WI+2];\n" + " datas[3] = sortData[gIdx*NUM_PER_WI+3];\n" + "\n" + " keys[0] = (datas[0].m_key >> cb.m_startBit) & 0xff;\n" + " keys[1] = (datas[1].m_key >> cb.m_startBit) & 0xff;\n" + " keys[2] = (datas[2].m_key >> cb.m_startBit) & 0xff;\n" + " keys[3] = (datas[3].m_key >> cb.m_startBit) & 0xff;\n" + "\n" + " int dst[NUM_PER_WI];\n" + " for(int i=0; i sortData : register( t0 );\n" + "RWStructuredBuffer ldsHistogramOut : register( u0 );\n" + "\n" + "groupshared u32 ldsHistogram[16][256];\n" + "\n" + "[numthreads(WG_SIZE, 1, 1)]\n" + "void LocalCountKernel( DEFAULT_ARGS )\n" + "{\n" + " int lIdx = GET_LOCAL_IDX;\n" + " int gIdx = GET_GLOBAL_IDX;\n" + "\n" + " for(int i=0; i<16; i++)\n" + " {\n" + " ldsHistogram[i][lIdx] = 0.f;\n" + " ldsHistogram[i][lIdx+128] = 0.f;\n" + " }\n" + "\n" + " GROUP_LDS_BARRIER;\n" + "\n" + " SortData datas[NUM_PER_WI];\n" + " datas[0] = sortData[gIdx*NUM_PER_WI+0];\n" + " datas[1] = sortData[gIdx*NUM_PER_WI+1];\n" + " datas[2] = sortData[gIdx*NUM_PER_WI+2];\n" + " datas[3] = sortData[gIdx*NUM_PER_WI+3];\n" + "\n" + " datas[0].m_key = (datas[0].m_key >> m_startBit) & 0xff;\n" + " datas[1].m_key = (datas[1].m_key >> m_startBit) & 0xff;\n" + " datas[2].m_key = (datas[2].m_key >> m_startBit) & 0xff;\n" + " datas[3].m_key = (datas[3].m_key >> m_startBit) & 0xff;\n" + "\n" + " int tableIdx = lIdx%16;\n" + "\n" + " AtomInc(ldsHistogram[tableIdx][datas[0].m_key]);\n" + " AtomInc(ldsHistogram[tableIdx][datas[1].m_key]);\n" + " AtomInc(ldsHistogram[tableIdx][datas[2].m_key]);\n" + " AtomInc(ldsHistogram[tableIdx][datas[3].m_key]);\n" + "\n" + " GROUP_LDS_BARRIER;\n" + "\n" + " u32 sum0, sum1;\n" + " sum0 = sum1 = 0;\n" + " for(int i=0; i<16; i++)\n" + " {\n" + " sum0 += ldsHistogram[i][lIdx];\n" + " sum1 += ldsHistogram[i][lIdx+128];\n" + " }\n" + "\n" + " ldsHistogramOut[lIdx*m_numGroups+GET_GROUP_IDX] = sum0;\n" + " ldsHistogramOut[(lIdx+128)*m_numGroups+GET_GROUP_IDX] = sum1;\n" + "}\n" + "\n" + "\n" + "RWStructuredBuffer sortDataOut : register( u0 );\n" + "RWStructuredBuffer scannedHistogram : register( u1 );\n" + "\n" + "groupshared u32 ldsCurrentLocation[256];\n" + "\n" + "[numthreads(WG_SIZE, 1, 1)]\n" + "void ScatterKernel( DEFAULT_ARGS )\n" + "{\n" + " int lIdx = GET_LOCAL_IDX;\n" + " int gIdx = GET_GLOBAL_IDX;\n" + "\n" + " {\n" + " ldsCurrentLocation[lIdx] = scannedHistogram[lIdx*m_numGroups+GET_GROUP_IDX];\n" + " ldsCurrentLocation[lIdx+128] = scannedHistogram[(lIdx+128)*m_numGroups+GET_GROUP_IDX];\n" + " }\n" + "\n" + " GROUP_LDS_BARRIER;\n" + "\n" + " SortData datas[NUM_PER_WI];\n" + " int keys[NUM_PER_WI];\n" + " datas[0] = sortData[gIdx*NUM_PER_WI+0];\n" + " datas[1] = sortData[gIdx*NUM_PER_WI+1];\n" + " datas[2] = sortData[gIdx*NUM_PER_WI+2];\n" + " datas[3] = sortData[gIdx*NUM_PER_WI+3];\n" + "\n" + " keys[0] = (datas[0].m_key >> m_startBit) & 0xff;\n" + " keys[1] = (datas[1].m_key >> m_startBit) & 0xff;\n" + " keys[2] = (datas[2].m_key >> m_startBit) & 0xff;\n" + " keys[3] = (datas[3].m_key >> m_startBit) & 0xff;\n" + "\n" + " int dst[NUM_PER_WI];\n" + " for(int i=0; i> cb.m_startBit) & 0xff; + datas[1].m_key = (datas[1].m_key >> cb.m_startBit) & 0xff; + datas[2].m_key = (datas[2].m_key >> cb.m_startBit) & 0xff; + datas[3].m_key = (datas[3].m_key >> cb.m_startBit) & 0xff; + + int tableIdx = lIdx%16; + + AtomInc(ldsHistogram[tableIdx][datas[0].m_key]); + AtomInc(ldsHistogram[tableIdx][datas[1].m_key]); + AtomInc(ldsHistogram[tableIdx][datas[2].m_key]); + AtomInc(ldsHistogram[tableIdx][datas[3].m_key]); + + GROUP_LDS_BARRIER; + + u32 sum0, sum1; + sum0 = sum1 = 0; + for(int i=0; i<16; i++) + { + sum0 += ldsHistogram[i][lIdx]; + sum1 += ldsHistogram[i][lIdx+128]; + } + + ldsHistogramOut[lIdx*cb.m_numGroups+GET_GROUP_IDX] = sum0; + ldsHistogramOut[(lIdx+128)*cb.m_numGroups+GET_GROUP_IDX] = sum1; +} + +__kernel +__attribute__((reqd_work_group_size(WG_SIZE,1,1))) +void ScatterKernel(__global SortData* sortData, + __global SortData* sortDataOut, + __global u32* scannedHistogram, + ConstBuffer cb) +{ + __local u32 ldsCurrentLocation[256]; + + int lIdx = GET_LOCAL_IDX; + int gIdx = GET_GLOBAL_IDX; + + { + ldsCurrentLocation[lIdx] = scannedHistogram[lIdx*cb.m_numGroups+GET_GROUP_IDX]; + ldsCurrentLocation[lIdx+128] = scannedHistogram[(lIdx+128)*cb.m_numGroups+GET_GROUP_IDX]; + } + + GROUP_LDS_BARRIER; + + SortData datas[NUM_PER_WI]; + int keys[NUM_PER_WI]; + datas[0] = sortData[gIdx*NUM_PER_WI+0]; + datas[1] = sortData[gIdx*NUM_PER_WI+1]; + datas[2] = sortData[gIdx*NUM_PER_WI+2]; + datas[3] = sortData[gIdx*NUM_PER_WI+3]; + + keys[0] = (datas[0].m_key >> cb.m_startBit) & 0xff; + keys[1] = (datas[1].m_key >> cb.m_startBit) & 0xff; + keys[2] = (datas[2].m_key >> cb.m_startBit) & 0xff; + keys[3] = (datas[3].m_key >> cb.m_startBit) & 0xff; + + int dst[NUM_PER_WI]; + for(int i=0; i sortData : register( t0 ); +RWStructuredBuffer ldsHistogramOut : register( u0 ); + +groupshared u32 ldsHistogram[16][256]; + +[numthreads(WG_SIZE, 1, 1)] +void LocalCountKernel( DEFAULT_ARGS ) +{ + int lIdx = GET_LOCAL_IDX; + int gIdx = GET_GLOBAL_IDX; + + for(int i=0; i<16; i++) + { + ldsHistogram[i][lIdx] = 0.f; + ldsHistogram[i][lIdx+128] = 0.f; + } + + GROUP_LDS_BARRIER; + + SortData datas[NUM_PER_WI]; + datas[0] = sortData[gIdx*NUM_PER_WI+0]; + datas[1] = sortData[gIdx*NUM_PER_WI+1]; + datas[2] = sortData[gIdx*NUM_PER_WI+2]; + datas[3] = sortData[gIdx*NUM_PER_WI+3]; + + datas[0].m_key = (datas[0].m_key >> m_startBit) & 0xff; + datas[1].m_key = (datas[1].m_key >> m_startBit) & 0xff; + datas[2].m_key = (datas[2].m_key >> m_startBit) & 0xff; + datas[3].m_key = (datas[3].m_key >> m_startBit) & 0xff; + + int tableIdx = lIdx%16; + + AtomInc(ldsHistogram[tableIdx][datas[0].m_key]); + AtomInc(ldsHistogram[tableIdx][datas[1].m_key]); + AtomInc(ldsHistogram[tableIdx][datas[2].m_key]); + AtomInc(ldsHistogram[tableIdx][datas[3].m_key]); + + GROUP_LDS_BARRIER; + + u32 sum0, sum1; + sum0 = sum1 = 0; + for(int i=0; i<16; i++) + { + sum0 += ldsHistogram[i][lIdx]; + sum1 += ldsHistogram[i][lIdx+128]; + } + + ldsHistogramOut[lIdx*m_numGroups+GET_GROUP_IDX] = sum0; + ldsHistogramOut[(lIdx+128)*m_numGroups+GET_GROUP_IDX] = sum1; +} + + +RWStructuredBuffer sortDataOut : register( u0 ); +RWStructuredBuffer scannedHistogram : register( u1 ); + +groupshared u32 ldsCurrentLocation[256]; + +[numthreads(WG_SIZE, 1, 1)] +void ScatterKernel( DEFAULT_ARGS ) +{ + int lIdx = GET_LOCAL_IDX; + int gIdx = GET_GLOBAL_IDX; + + { + ldsCurrentLocation[lIdx] = scannedHistogram[lIdx*m_numGroups+GET_GROUP_IDX]; + ldsCurrentLocation[lIdx+128] = scannedHistogram[(lIdx+128)*m_numGroups+GET_GROUP_IDX]; + } + + GROUP_LDS_BARRIER; + + SortData datas[NUM_PER_WI]; + int keys[NUM_PER_WI]; + datas[0] = sortData[gIdx*NUM_PER_WI+0]; + datas[1] = sortData[gIdx*NUM_PER_WI+1]; + datas[2] = sortData[gIdx*NUM_PER_WI+2]; + datas[3] = sortData[gIdx*NUM_PER_WI+3]; + + keys[0] = (datas[0].m_key >> m_startBit) & 0xff; + keys[1] = (datas[1].m_key >> m_startBit) & 0xff; + keys[2] = (datas[2].m_key >> m_startBit) & 0xff; + keys[3] = (datas[3].m_key >> m_startBit) & 0xff; + + int dst[NUM_PER_WI]; + for(int i=0; i> cb.m_startBit) & 0xff;\n" +" datas[1].m_key = (datas[1].m_key >> cb.m_startBit) & 0xff;\n" +" datas[2].m_key = (datas[2].m_key >> cb.m_startBit) & 0xff;\n" +" datas[3].m_key = (datas[3].m_key >> cb.m_startBit) & 0xff;\n" +"\n" +" int tableIdx = lIdx%16;\n" +" \n" +" AtomInc(ldsHistogram[tableIdx][datas[0].m_key]);\n" +" AtomInc(ldsHistogram[tableIdx][datas[1].m_key]);\n" +" AtomInc(ldsHistogram[tableIdx][datas[2].m_key]);\n" +" AtomInc(ldsHistogram[tableIdx][datas[3].m_key]);\n" +"\n" +" GROUP_LDS_BARRIER;\n" +" \n" +" u32 sum0, sum1;\n" +" sum0 = sum1 = 0;\n" +" for(int i=0; i<16; i++)\n" +" {\n" +" sum0 += ldsHistogram[i][lIdx];\n" +" sum1 += ldsHistogram[i][lIdx+128];\n" +" }\n" +"\n" +" ldsHistogramOut[lIdx*cb.m_numGroups+GET_GROUP_IDX] = sum0;\n" +" ldsHistogramOut[(lIdx+128)*cb.m_numGroups+GET_GROUP_IDX] = sum1;\n" +"}\n" +"\n" +"__kernel\n" +"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" +"void ScatterKernel(__global SortData* sortData,\n" +" __global SortData* sortDataOut,\n" +" __global u32* scannedHistogram, \n" +" ConstBuffer cb)\n" +"{\n" +" __local u32 ldsCurrentLocation[256];\n" +"\n" +" int lIdx = GET_LOCAL_IDX;\n" +" int gIdx = GET_GLOBAL_IDX;\n" +" \n" +" {\n" +" ldsCurrentLocation[lIdx] = scannedHistogram[lIdx*cb.m_numGroups+GET_GROUP_IDX];\n" +" ldsCurrentLocation[lIdx+128] = scannedHistogram[(lIdx+128)*cb.m_numGroups+GET_GROUP_IDX];\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +" \n" +" SortData datas[NUM_PER_WI];\n" +" int keys[NUM_PER_WI];\n" +" datas[0] = sortData[gIdx*NUM_PER_WI+0];\n" +" datas[1] = sortData[gIdx*NUM_PER_WI+1];\n" +" datas[2] = sortData[gIdx*NUM_PER_WI+2];\n" +" datas[3] = sortData[gIdx*NUM_PER_WI+3];\n" +"\n" +" keys[0] = (datas[0].m_key >> cb.m_startBit) & 0xff;\n" +" keys[1] = (datas[1].m_key >> cb.m_startBit) & 0xff;\n" +" keys[2] = (datas[2].m_key >> cb.m_startBit) & 0xff;\n" +" keys[3] = (datas[3].m_key >> cb.m_startBit) & 0xff;\n" +"\n" +" int dst[NUM_PER_WI];\n" +" for(int i=0; i sortData : register( t0 );\n" +"RWStructuredBuffer ldsHistogramOut : register( u0 );\n" +"\n" +"groupshared u32 ldsHistogram[16][256];\n" +"\n" +"[numthreads(WG_SIZE, 1, 1)]\n" +"void LocalCountKernel( DEFAULT_ARGS )\n" +"{\n" +" int lIdx = GET_LOCAL_IDX;\n" +" int gIdx = GET_GLOBAL_IDX;\n" +" \n" +" for(int i=0; i<16; i++)\n" +" {\n" +" ldsHistogram[i][lIdx] = 0.f;\n" +" ldsHistogram[i][lIdx+128] = 0.f;\n" +" }\n" +" \n" +" GROUP_LDS_BARRIER;\n" +" \n" +" SortData datas[NUM_PER_WI];\n" +" datas[0] = sortData[gIdx*NUM_PER_WI+0];\n" +" datas[1] = sortData[gIdx*NUM_PER_WI+1];\n" +" datas[2] = sortData[gIdx*NUM_PER_WI+2];\n" +" datas[3] = sortData[gIdx*NUM_PER_WI+3];\n" +"\n" +" datas[0].m_key = (datas[0].m_key >> m_startBit) & 0xff;\n" +" datas[1].m_key = (datas[1].m_key >> m_startBit) & 0xff;\n" +" datas[2].m_key = (datas[2].m_key >> m_startBit) & 0xff;\n" +" datas[3].m_key = (datas[3].m_key >> m_startBit) & 0xff;\n" +"\n" +" int tableIdx = lIdx%16;\n" +" \n" +" AtomInc(ldsHistogram[tableIdx][datas[0].m_key]);\n" +" AtomInc(ldsHistogram[tableIdx][datas[1].m_key]);\n" +" AtomInc(ldsHistogram[tableIdx][datas[2].m_key]);\n" +" AtomInc(ldsHistogram[tableIdx][datas[3].m_key]);\n" +"\n" +" GROUP_LDS_BARRIER;\n" +" \n" +" u32 sum0, sum1;\n" +" sum0 = sum1 = 0;\n" +" for(int i=0; i<16; i++)\n" +" {\n" +" sum0 += ldsHistogram[i][lIdx];\n" +" sum1 += ldsHistogram[i][lIdx+128];\n" +" }\n" +"\n" +" ldsHistogramOut[lIdx*m_numGroups+GET_GROUP_IDX] = sum0;\n" +" ldsHistogramOut[(lIdx+128)*m_numGroups+GET_GROUP_IDX] = sum1;\n" +"}\n" +"\n" +"\n" +"RWStructuredBuffer sortDataOut : register( u0 );\n" +"RWStructuredBuffer scannedHistogram : register( u1 );\n" +"\n" +"groupshared u32 ldsCurrentLocation[256];\n" +"\n" +"[numthreads(WG_SIZE, 1, 1)]\n" +"void ScatterKernel( DEFAULT_ARGS )\n" +"{\n" +" int lIdx = GET_LOCAL_IDX;\n" +" int gIdx = GET_GLOBAL_IDX;\n" +" \n" +" {\n" +" ldsCurrentLocation[lIdx] = scannedHistogram[lIdx*m_numGroups+GET_GROUP_IDX];\n" +" ldsCurrentLocation[lIdx+128] = scannedHistogram[(lIdx+128)*m_numGroups+GET_GROUP_IDX];\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +" \n" +" SortData datas[NUM_PER_WI];\n" +" int keys[NUM_PER_WI];\n" +" datas[0] = sortData[gIdx*NUM_PER_WI+0];\n" +" datas[1] = sortData[gIdx*NUM_PER_WI+1];\n" +" datas[2] = sortData[gIdx*NUM_PER_WI+2];\n" +" datas[3] = sortData[gIdx*NUM_PER_WI+3];\n" +"\n" +" keys[0] = (datas[0].m_key >> m_startBit) & 0xff;\n" +" keys[1] = (datas[1].m_key >> m_startBit) & 0xff;\n" +" keys[2] = (datas[2].m_key >> m_startBit) & 0xff;\n" +" keys[3] = (datas[3].m_key >> m_startBit) & 0xff;\n" +"\n" +" int dst[NUM_PER_WI];\n" +" for(int i=0; i +#include + +template +class RadixSortStandard : public RadixSortBase +{ + public: + typedef Launcher::BufferInfo BufferInfo; + + enum + { + WG_SIZE = 128, + NUM_PER_WI = 4, + + BITS_PER_PASS = 4, + }; + + struct Data : public RadixSort::Data + { + Kernel* m_localSortKernel; + Kernel* m_scatterKernel; + Kernel* m_copyKernel; + + Buffer* m_workBuffer0; + Buffer* m_workBuffer1; + Buffer* m_workBuffer2; + Buffer* m_workBuffer3; + Buffer* m_constBuffer[32/BITS_PER_PASS]; + }; + + + static + Data* allocate(const Device* deviceData, int maxSize, Option option = SORT_NORMAL); + + static + void deallocate(void* data); + + static + void execute(void* data, Buffer& inout, int n, int sortBits); +}; + +template +typename RadixSortStandard::Data* RadixSortStandard::allocate(const Device* deviceData, int maxSize, Option option) +{ + ADLASSERT( type == deviceData->m_type ); + + u32 maxNumGroups = (maxSize+WG_SIZE*NUM_PER_WI-1)/(WG_SIZE*NUM_PER_WI); + + const char* src[] = +#if defined(ADL_LOAD_KERNEL_FROM_STRING) + {radixSortStandardKernelsCL,radixSortStandardKernelsDX11}; +// ADLASSERT(0); +#else + {0,0}; +#endif + + Data* data = new Data; + data->m_option = option; + data->m_deviceData = deviceData; + + data->m_localSortKernel = deviceData->getKernel( PATH, KERNEL0, 0, src[type] ); + data->m_scatterKernel = deviceData->getKernel( PATH, KERNEL1, 0, src[type] ); + data->m_copyKernel = deviceData->getKernel( PATH, KERNEL2, 0, src[type] ); + + // is this correct? + data->m_scanData = PrefixScan::allocate( deviceData, maxNumGroups*(1<m_workBuffer0 = new Buffer( deviceData, maxNumGroups*(1<m_workBuffer1 = new Buffer( deviceData, maxNumGroups*(1<m_workBuffer2 = new Buffer( deviceData, maxNumGroups*(1<m_workBuffer3 = new Buffer( deviceData, maxSize ); + for(int i=0; i<32/BITS_PER_PASS; i++) + data->m_constBuffer[i] = new Buffer( deviceData, 1, BufferBase::BUFFER_CONST ); + data->m_maxSize = maxSize; + + return data; +} + +template +void RadixSortStandard::deallocate(void* rawData) +{ + Data* data = (Data*)rawData; + + delete data->m_workBuffer0; + delete data->m_workBuffer1; + delete data->m_workBuffer2; + delete data->m_workBuffer3; + for(int i=0; i<32/BITS_PER_PASS; i++) + delete data->m_constBuffer[i]; + + PrefixScan::deallocate( data->m_scanData ); + + delete data; +} + +template +void RadixSortStandard::execute(void* rawData, Buffer& inout, int n, int sortBits) +{ + Data* data = (Data*)rawData; + + ADLASSERT( n%512 == 0 ); + ADLASSERT( n <= data->m_maxSize ); + ADLASSERT( NUM_PER_WI == 4 ); + + Buffer* src = BufferUtils::map( data->m_deviceData, &inout ); + Buffer* dst = data->m_workBuffer3; + + const Device* deviceData = data->m_deviceData; + + int numGroups = (n+WG_SIZE*NUM_PER_WI-1)/(WG_SIZE*NUM_PER_WI); + + int4 constBuffer; + + int iPass = 0; + for(int startBit=0; startBitm_workBuffer0 ), BufferInfo( data->m_workBuffer1 ) }; + + Launcher launcher( deviceData, data->m_localSortKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( *data->m_constBuffer[iPass], constBuffer ); + launcher.launch1D( WG_SIZE*numGroups, WG_SIZE ); + } + + PrefixScan::execute( data->m_scanData, *data->m_workBuffer0, *data->m_workBuffer2, numGroups*(1<m_workBuffer2, true ), BufferInfo( data->m_workBuffer1, true ), + BufferInfo( dst ) }; + + Launcher launcher( deviceData, data->m_scatterKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( *data->m_constBuffer[iPass], constBuffer ); + launcher.launch1D( WG_SIZE*numGroups, WG_SIZE ); + } + + if(0) + { + BufferInfo bInfo[] = { BufferInfo( dst, true ), BufferInfo( src ) }; + + Launcher launcher( deviceData, data->m_copyKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.launch1D( n, WG_SIZE ); + } + swap2( src, dst ); + } + + if( src != &inout ) + { + BufferInfo bInfo[] = { BufferInfo( src, true ), BufferInfo( dst ) }; + + Launcher launcher( deviceData, data->m_copyKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.launch1D( n, WG_SIZE ); + } + + BufferUtils::unmap( src, &inout ); +} + +#undef PATH +#undef KERNEL0 +#undef KERNEL1 +#undef KERNEL2 diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/RadixSortStandardKernels.cl b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/RadixSortStandardKernels.cl new file mode 100644 index 000000000..c79348d60 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/RadixSortStandardKernels.cl @@ -0,0 +1,345 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Author Takahiro Harada + + +#pragma OPENCL EXTENSION cl_amd_printf : enable +#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable + +typedef unsigned int u32; +#define GET_GROUP_IDX get_group_id(0) +#define GET_LOCAL_IDX get_local_id(0) +#define GET_GLOBAL_IDX get_global_id(0) +#define GET_GROUP_SIZE get_local_size(0) +#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) +#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) +#define AtomInc(x) atom_inc(&(x)) +#define AtomInc1(x, out) out = atom_inc(&(x)) + +#define make_uint4 (uint4) +#define make_uint2 (uint2) + +#define SELECT_UINT4( b, a, condition ) select( b,a,condition ) + +#define WG_SIZE 128 +#define NUM_PER_WI 4 + + +typedef struct +{ + u32 m_key; + u32 m_value; +}SortData; + + +typedef struct +{ + u32 m_startBit; + u32 m_numGroups; + u32 m_padding[2]; +} ConstBuffer; + +#define BITS_PER_PASS 4 + + + +uint4 prefixScanVector( uint4 data ) +{ + data.y += data.x; + data.w += data.z; + data.z += data.y; + data.w += data.y; + return data; +} + +uint prefixScanVectorEx( uint4* data ) +{ + uint4 backup = data[0]; + data[0].y += data[0].x; + data[0].w += data[0].z; + data[0].z += data[0].y; + data[0].w += data[0].y; + uint sum = data[0].w; + *data -= backup; + return sum; +} + +uint4 localPrefixSum128V( uint4 pData, uint lIdx, uint* totalSum, __local u32 sorterSharedMemory[] ) +{ + { // Set data + sorterSharedMemory[lIdx] = 0; + sorterSharedMemory[lIdx+WG_SIZE] = prefixScanVectorEx( &pData ); + } + + GROUP_LDS_BARRIER; + + { // Prefix sum + int idx = 2*lIdx + (WG_SIZE+1); + if( lIdx < 64 ) + { + sorterSharedMemory[idx] += sorterSharedMemory[idx-1]; + GROUP_MEM_FENCE; + sorterSharedMemory[idx] += sorterSharedMemory[idx-2]; + GROUP_MEM_FENCE; + sorterSharedMemory[idx] += sorterSharedMemory[idx-4]; + GROUP_MEM_FENCE; + sorterSharedMemory[idx] += sorterSharedMemory[idx-8]; + GROUP_MEM_FENCE; + sorterSharedMemory[idx] += sorterSharedMemory[idx-16]; + GROUP_MEM_FENCE; + sorterSharedMemory[idx] += sorterSharedMemory[idx-32]; + GROUP_MEM_FENCE; + sorterSharedMemory[idx] += sorterSharedMemory[idx-64]; + GROUP_MEM_FENCE; + + sorterSharedMemory[idx-1] += sorterSharedMemory[idx-2]; + GROUP_MEM_FENCE; + } + } + + GROUP_LDS_BARRIER; + + *totalSum = sorterSharedMemory[WG_SIZE*2-1]; + uint addValue = sorterSharedMemory[lIdx+127]; + return pData + make_uint4(addValue, addValue, addValue, addValue); +} + + +void generateHistogram(u32 lIdx, u32 wgIdx, + uint4 sortedData, + __local u32 *histogram) +{ + if( lIdx < (1<>cb.m_startBit, sortData[1].m_key>>cb.m_startBit, + sortData[2].m_key>>cb.m_startBit, sortData[3].m_key>>cb.m_startBit ); + + generateHistogram( lIdx, wgIdx, localKeys, ldsSortData ); + + GROUP_LDS_BARRIER; + + int nBins = (1<>cb.m_startBit)&cmpValue, (sortData[1].m_key>>cb.m_startBit)&cmpValue, + (sortData[2].m_key>>cb.m_startBit)&cmpValue, (sortData[3].m_key>>cb.m_startBit)&cmpValue );; + + // data is already sorted. So simply subtract local prefix sum + uint4 dstAddr; + dstAddr.x = ldsGlobalHistogram[radix.x] + (localAddr.x - ldsLocalHistogram[radix.x]); + dstAddr.y = ldsGlobalHistogram[radix.y] + (localAddr.y - ldsLocalHistogram[radix.y]); + dstAddr.z = ldsGlobalHistogram[radix.z] + (localAddr.z - ldsLocalHistogram[radix.z]); + dstAddr.w = ldsGlobalHistogram[radix.w] + (localAddr.w - ldsLocalHistogram[radix.w]); + + dst[dstAddr.x] = sortData[0]; + dst[dstAddr.y] = sortData[1]; + dst[dstAddr.z] = sortData[2]; + dst[dstAddr.w] = sortData[3]; +} + +__kernel +__attribute__((reqd_work_group_size(WG_SIZE,1,1))) +void CopyKernel(__global SortData *src, __global SortData *dst) +{ + dst[ GET_GLOBAL_IDX ] = src[ GET_GLOBAL_IDX ]; +} diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/RadixSortStandardKernels.hlsl b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/RadixSortStandardKernels.hlsl new file mode 100644 index 000000000..55a6a1ca7 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/RadixSortStandardKernels.hlsl @@ -0,0 +1,322 @@ +/* + 2011 Takahiro Harada +*/ + +typedef uint u32; + +#define GET_GROUP_IDX groupIdx.x +#define GET_LOCAL_IDX localIdx.x +#define GET_GLOBAL_IDX globalIdx.x +#define GROUP_LDS_BARRIER GroupMemoryBarrierWithGroupSync() +#define GROUP_MEM_FENCE +#define DEFAULT_ARGS uint3 globalIdx : SV_DispatchThreadID, uint3 localIdx : SV_GroupThreadID, uint3 groupIdx : SV_GroupID +#define AtomInc(x) InterlockedAdd(x, 1) +#define AtomInc1(x, out) InterlockedAdd(x, 1, out) + +#define make_uint4 uint4 +#define make_uint2 uint2 + +uint4 SELECT_UINT4(uint4 b,uint4 a,uint4 condition ){ return make_uint4( ((condition).x)?a.x:b.x, ((condition).y)?a.y:b.y, ((condition).z)?a.z:b.z, ((condition).w)?a.w:b.w ); } + +// takahiro end +#define WG_SIZE 128 +#define NUM_PER_WI 4 + +#define GET_GROUP_SIZE WG_SIZE + +typedef struct +{ + u32 m_key; + u32 m_value; +}SortData; + +cbuffer SortCB : register( b0 ) +{ + u32 m_startBit; + u32 m_numGroups; + u32 m_padding[2]; +}; + +#define BITS_PER_PASS 4 + + +uint4 prefixScanVector( uint4 data ) +{ + data.y += data.x; + data.w += data.z; + data.z += data.y; + data.w += data.y; + return data; +} + +uint prefixScanVectorEx( inout uint4 data ) +{ + uint4 backup = data; + data.y += data.x; + data.w += data.z; + data.z += data.y; + data.w += data.y; + uint sum = data.w; + data -= backup; + return sum; +} + + + +RWStructuredBuffer sortDataIn : register( u0 ); +RWStructuredBuffer ldsHistogramOut0 : register( u1 ); +RWStructuredBuffer ldsHistogramOut1 : register( u2 ); + +groupshared u32 ldsSortData[ WG_SIZE*NUM_PER_WI + 16 ]; + + +uint4 localPrefixSum128V( uint4 pData, uint lIdx, inout uint totalSum ) +{ + { // Set data + ldsSortData[lIdx] = 0; + ldsSortData[lIdx+WG_SIZE] = prefixScanVectorEx( pData ); + } + + GROUP_LDS_BARRIER; + + { // Prefix sum + int idx = 2*lIdx + (WG_SIZE+1); + if( lIdx < 64 ) + { + ldsSortData[idx] += ldsSortData[idx-1]; + GROUP_MEM_FENCE; + ldsSortData[idx] += ldsSortData[idx-2]; + GROUP_MEM_FENCE; + ldsSortData[idx] += ldsSortData[idx-4]; + GROUP_MEM_FENCE; + ldsSortData[idx] += ldsSortData[idx-8]; + GROUP_MEM_FENCE; + ldsSortData[idx] += ldsSortData[idx-16]; + GROUP_MEM_FENCE; + ldsSortData[idx] += ldsSortData[idx-32]; + GROUP_MEM_FENCE; + ldsSortData[idx] += ldsSortData[idx-64]; + GROUP_MEM_FENCE; + + ldsSortData[idx-1] += ldsSortData[idx-2]; + GROUP_MEM_FENCE; + } + } + + GROUP_LDS_BARRIER; + + totalSum = ldsSortData[WG_SIZE*2-1]; + uint addValue = ldsSortData[lIdx+127]; + return pData + make_uint4(addValue, addValue, addValue, addValue); +} + +void generateHistogram(u32 lIdx, u32 wgIdx, + uint4 sortedData) +{ + if( lIdx < (1<>m_startBit, sortData[1].m_key>>m_startBit, + sortData[2].m_key>>m_startBit, sortData[3].m_key>>m_startBit ); + + generateHistogram( lIdx, wgIdx, localKeys ); + + GROUP_LDS_BARRIER; + + int nBins = (1< src : register( t0 ); +StructuredBuffer histogramGlobalRadixMajor : register( t1 ); +StructuredBuffer histogramLocalGroupMajor : register( t2 ); + +RWStructuredBuffer dst : register( u0 ); + +groupshared u32 ldsLocalHistogram[ 2*(1<>m_startBit)&cmpValue, (sortData[1].m_key>>m_startBit)&cmpValue, + (sortData[2].m_key>>m_startBit)&cmpValue, (sortData[3].m_key>>m_startBit)&cmpValue );; + + // data is already sorted. So simply subtract local prefix sum + uint4 dstAddr; + dstAddr.x = ldsGlobalHistogram[radix.x] + (localAddr.x - ldsLocalHistogram[radix.x]); + dstAddr.y = ldsGlobalHistogram[radix.y] + (localAddr.y - ldsLocalHistogram[radix.y]); + dstAddr.z = ldsGlobalHistogram[radix.z] + (localAddr.z - ldsLocalHistogram[radix.z]); + dstAddr.w = ldsGlobalHistogram[radix.w] + (localAddr.w - ldsLocalHistogram[radix.w]); + + dst[dstAddr.x] = sortData[0]; + dst[dstAddr.y] = sortData[1]; + dst[dstAddr.z] = sortData[2]; + dst[dstAddr.w] = sortData[3]; +} + +[numthreads(WG_SIZE, 1, 1)] +void CopyKernel( DEFAULT_ARGS ) +{ + dst[ GET_GLOBAL_IDX ] = src[ GET_GLOBAL_IDX ]; +} diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/RadixSortStandardKernelsCL.h b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/RadixSortStandardKernelsCL.h new file mode 100644 index 000000000..e793c7e94 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/RadixSortStandardKernelsCL.h @@ -0,0 +1,347 @@ +static const char* radixSortStandardKernelsCL= \ +"/*\n" +"Bullet Continuous Collision Detection and Physics Library\n" +"Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org\n" +"\n" +"This software is provided 'as-is', without any express or implied warranty.\n" +"In no event will the authors be held liable for any damages arising from the use of this software.\n" +"Permission is granted to anyone to use this software for any purpose, \n" +"including commercial applications, and to alter it and redistribute it freely, \n" +"subject to the following restrictions:\n" +"\n" +"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" +"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" +"3. This notice may not be removed or altered from any source distribution.\n" +"*/\n" +"//Author Takahiro Harada\n" +"\n" +"\n" +"#pragma OPENCL EXTENSION cl_amd_printf : enable\n" +"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" +"\n" +"typedef unsigned int u32;\n" +"#define GET_GROUP_IDX get_group_id(0)\n" +"#define GET_LOCAL_IDX get_local_id(0)\n" +"#define GET_GLOBAL_IDX get_global_id(0)\n" +"#define GET_GROUP_SIZE get_local_size(0)\n" +"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" +"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" +"#define AtomInc(x) atom_inc(&(x))\n" +"#define AtomInc1(x, out) out = atom_inc(&(x))\n" +"\n" +"#define make_uint4 (uint4)\n" +"#define make_uint2 (uint2)\n" +"\n" +"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" +"\n" +"#define WG_SIZE 128\n" +"#define NUM_PER_WI 4\n" +"\n" +"\n" +"typedef struct\n" +"{\n" +" u32 m_key; \n" +" u32 m_value;\n" +"}SortData;\n" +"\n" +"\n" +"typedef struct\n" +"{\n" +" u32 m_startBit;\n" +" u32 m_numGroups;\n" +" u32 m_padding[2];\n" +"} ConstBuffer;\n" +"\n" +"#define BITS_PER_PASS 4\n" +"\n" +"\n" +"\n" +"uint4 prefixScanVector( uint4 data )\n" +"{\n" +" data.y += data.x;\n" +" data.w += data.z;\n" +" data.z += data.y;\n" +" data.w += data.y;\n" +" return data;\n" +"}\n" +"\n" +"uint prefixScanVectorEx( uint4* data )\n" +"{\n" +" uint4 backup = data[0];\n" +" data[0].y += data[0].x;\n" +" data[0].w += data[0].z;\n" +" data[0].z += data[0].y;\n" +" data[0].w += data[0].y;\n" +" uint sum = data[0].w;\n" +" *data -= backup;\n" +" return sum;\n" +"}\n" +"\n" +"uint4 localPrefixSum128V( uint4 pData, uint lIdx, uint* totalSum, __local u32 sorterSharedMemory[] )\n" +"{\n" +" { // Set data\n" +" sorterSharedMemory[lIdx] = 0;\n" +" sorterSharedMemory[lIdx+WG_SIZE] = prefixScanVectorEx( &pData );\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" { // Prefix sum\n" +" int idx = 2*lIdx + (WG_SIZE+1);\n" +" if( lIdx < 64 )\n" +" {\n" +" sorterSharedMemory[idx] += sorterSharedMemory[idx-1];\n" +" GROUP_MEM_FENCE;\n" +" sorterSharedMemory[idx] += sorterSharedMemory[idx-2]; \n" +" GROUP_MEM_FENCE;\n" +" sorterSharedMemory[idx] += sorterSharedMemory[idx-4];\n" +" GROUP_MEM_FENCE;\n" +" sorterSharedMemory[idx] += sorterSharedMemory[idx-8];\n" +" GROUP_MEM_FENCE;\n" +" sorterSharedMemory[idx] += sorterSharedMemory[idx-16];\n" +" GROUP_MEM_FENCE;\n" +" sorterSharedMemory[idx] += sorterSharedMemory[idx-32]; \n" +" GROUP_MEM_FENCE;\n" +" sorterSharedMemory[idx] += sorterSharedMemory[idx-64];\n" +" GROUP_MEM_FENCE;\n" +"\n" +" sorterSharedMemory[idx-1] += sorterSharedMemory[idx-2];\n" +" GROUP_MEM_FENCE;\n" +" }\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" *totalSum = sorterSharedMemory[WG_SIZE*2-1];\n" +" uint addValue = sorterSharedMemory[lIdx+127];\n" +" return pData + make_uint4(addValue, addValue, addValue, addValue);\n" +"}\n" +"\n" +"\n" +"void generateHistogram(u32 lIdx, u32 wgIdx, \n" +" uint4 sortedData,\n" +" __local u32 *histogram)\n" +"{\n" +" if( lIdx < (1<>cb.m_startBit, sortData[1].m_key>>cb.m_startBit, \n" +" sortData[2].m_key>>cb.m_startBit, sortData[3].m_key>>cb.m_startBit );\n" +"\n" +" generateHistogram( lIdx, wgIdx, localKeys, ldsSortData );\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" int nBins = (1<>cb.m_startBit)&cmpValue, (sortData[1].m_key>>cb.m_startBit)&cmpValue, \n" +" (sortData[2].m_key>>cb.m_startBit)&cmpValue, (sortData[3].m_key>>cb.m_startBit)&cmpValue );;\n" +"\n" +" // data is already sorted. So simply subtract local prefix sum\n" +" uint4 dstAddr;\n" +" dstAddr.x = ldsGlobalHistogram[radix.x] + (localAddr.x - ldsLocalHistogram[radix.x]);\n" +" dstAddr.y = ldsGlobalHistogram[radix.y] + (localAddr.y - ldsLocalHistogram[radix.y]);\n" +" dstAddr.z = ldsGlobalHistogram[radix.z] + (localAddr.z - ldsLocalHistogram[radix.z]);\n" +" dstAddr.w = ldsGlobalHistogram[radix.w] + (localAddr.w - ldsLocalHistogram[radix.w]);\n" +"\n" +" dst[dstAddr.x] = sortData[0];\n" +" dst[dstAddr.y] = sortData[1];\n" +" dst[dstAddr.z] = sortData[2];\n" +" dst[dstAddr.w] = sortData[3];\n" +"}\n" +"\n" +"__kernel\n" +"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" +"void CopyKernel(__global SortData *src, __global SortData *dst)\n" +"{\n" +" dst[ GET_GLOBAL_IDX ] = src[ GET_GLOBAL_IDX ];\n" +"}\n" +; diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/RadixSortStandardKernelsDX11.h b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/RadixSortStandardKernelsDX11.h new file mode 100644 index 000000000..1a919ed18 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/RadixSortStandardKernelsDX11.h @@ -0,0 +1,324 @@ +static const char* radixSortStandardKernelsDX11= \ +"/*\n" +" 2011 Takahiro Harada\n" +"*/\n" +"\n" +"typedef uint u32;\n" +"\n" +"#define GET_GROUP_IDX groupIdx.x\n" +"#define GET_LOCAL_IDX localIdx.x\n" +"#define GET_GLOBAL_IDX globalIdx.x\n" +"#define GROUP_LDS_BARRIER GroupMemoryBarrierWithGroupSync()\n" +"#define GROUP_MEM_FENCE\n" +"#define DEFAULT_ARGS uint3 globalIdx : SV_DispatchThreadID, uint3 localIdx : SV_GroupThreadID, uint3 groupIdx : SV_GroupID\n" +"#define AtomInc(x) InterlockedAdd(x, 1)\n" +"#define AtomInc1(x, out) InterlockedAdd(x, 1, out)\n" +"\n" +"#define make_uint4 uint4\n" +"#define make_uint2 uint2\n" +"\n" +"uint4 SELECT_UINT4(uint4 b,uint4 a,uint4 condition ){ return make_uint4( ((condition).x)?a.x:b.x, ((condition).y)?a.y:b.y, ((condition).z)?a.z:b.z, ((condition).w)?a.w:b.w ); }\n" +"\n" +"// takahiro end\n" +"#define WG_SIZE 128\n" +"#define NUM_PER_WI 4\n" +"\n" +"#define GET_GROUP_SIZE WG_SIZE\n" +"\n" +"typedef struct\n" +"{\n" +" u32 m_key; \n" +" u32 m_value;\n" +"}SortData;\n" +"\n" +"cbuffer SortCB : register( b0 )\n" +"{\n" +" u32 m_startBit;\n" +" u32 m_numGroups;\n" +" u32 m_padding[2];\n" +"};\n" +"\n" +"#define BITS_PER_PASS 4\n" +"\n" +"\n" +"uint4 prefixScanVector( uint4 data )\n" +"{\n" +" data.y += data.x;\n" +" data.w += data.z;\n" +" data.z += data.y;\n" +" data.w += data.y;\n" +" return data;\n" +"}\n" +"\n" +"uint prefixScanVectorEx( inout uint4 data )\n" +"{\n" +" uint4 backup = data;\n" +" data.y += data.x;\n" +" data.w += data.z;\n" +" data.z += data.y;\n" +" data.w += data.y;\n" +" uint sum = data.w;\n" +" data -= backup;\n" +" return sum;\n" +"}\n" +"\n" +"\n" +"\n" +"RWStructuredBuffer sortDataIn : register( u0 );\n" +"RWStructuredBuffer ldsHistogramOut0 : register( u1 );\n" +"RWStructuredBuffer ldsHistogramOut1 : register( u2 );\n" +"\n" +"groupshared u32 ldsSortData[ WG_SIZE*NUM_PER_WI + 16 ];\n" +"\n" +"\n" +"uint4 localPrefixSum128V( uint4 pData, uint lIdx, inout uint totalSum )\n" +"{\n" +" { // Set data\n" +" ldsSortData[lIdx] = 0;\n" +" ldsSortData[lIdx+WG_SIZE] = prefixScanVectorEx( pData );\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" { // Prefix sum\n" +" int idx = 2*lIdx + (WG_SIZE+1);\n" +" if( lIdx < 64 )\n" +" {\n" +" ldsSortData[idx] += ldsSortData[idx-1];\n" +" GROUP_MEM_FENCE;\n" +" ldsSortData[idx] += ldsSortData[idx-2]; \n" +" GROUP_MEM_FENCE;\n" +" ldsSortData[idx] += ldsSortData[idx-4];\n" +" GROUP_MEM_FENCE;\n" +" ldsSortData[idx] += ldsSortData[idx-8];\n" +" GROUP_MEM_FENCE;\n" +" ldsSortData[idx] += ldsSortData[idx-16];\n" +" GROUP_MEM_FENCE;\n" +" ldsSortData[idx] += ldsSortData[idx-32]; \n" +" GROUP_MEM_FENCE;\n" +" ldsSortData[idx] += ldsSortData[idx-64];\n" +" GROUP_MEM_FENCE;\n" +"\n" +" ldsSortData[idx-1] += ldsSortData[idx-2];\n" +" GROUP_MEM_FENCE;\n" +" }\n" +" }\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" totalSum = ldsSortData[WG_SIZE*2-1];\n" +" uint addValue = ldsSortData[lIdx+127];\n" +" return pData + make_uint4(addValue, addValue, addValue, addValue);\n" +"}\n" +"\n" +"void generateHistogram(u32 lIdx, u32 wgIdx, \n" +" uint4 sortedData)\n" +"{\n" +" if( lIdx < (1<>m_startBit, sortData[1].m_key>>m_startBit, \n" +" sortData[2].m_key>>m_startBit, sortData[3].m_key>>m_startBit );\n" +"\n" +" generateHistogram( lIdx, wgIdx, localKeys );\n" +"\n" +" GROUP_LDS_BARRIER;\n" +"\n" +" int nBins = (1< src : register( t0 );\n" +"StructuredBuffer histogramGlobalRadixMajor : register( t1 );\n" +"StructuredBuffer histogramLocalGroupMajor : register( t2 );\n" +"\n" +"RWStructuredBuffer dst : register( u0 );\n" +"\n" +"groupshared u32 ldsLocalHistogram[ 2*(1<>m_startBit)&cmpValue, (sortData[1].m_key>>m_startBit)&cmpValue, \n" +" (sortData[2].m_key>>m_startBit)&cmpValue, (sortData[3].m_key>>m_startBit)&cmpValue );;\n" +"\n" +" // data is already sorted. So simply subtract local prefix sum\n" +" uint4 dstAddr;\n" +" dstAddr.x = ldsGlobalHistogram[radix.x] + (localAddr.x - ldsLocalHistogram[radix.x]);\n" +" dstAddr.y = ldsGlobalHistogram[radix.y] + (localAddr.y - ldsLocalHistogram[radix.y]);\n" +" dstAddr.z = ldsGlobalHistogram[radix.z] + (localAddr.z - ldsLocalHistogram[radix.z]);\n" +" dstAddr.w = ldsGlobalHistogram[radix.w] + (localAddr.w - ldsLocalHistogram[radix.w]);\n" +"\n" +" dst[dstAddr.x] = sortData[0];\n" +" dst[dstAddr.y] = sortData[1];\n" +" dst[dstAddr.z] = sortData[2];\n" +" dst[dstAddr.w] = sortData[3];\n" +"}\n" +"\n" +"[numthreads(WG_SIZE, 1, 1)]\n" +"void CopyKernel( DEFAULT_ARGS )\n" +"{\n" +" dst[ GET_GLOBAL_IDX ] = src[ GET_GLOBAL_IDX ];\n" +"}\n" +; diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/SortData.h b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/SortData.h new file mode 100644 index 000000000..3d88ebdfd --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/SortData.h @@ -0,0 +1,31 @@ +/* + 2011 Takahiro Harada +*/ + +#pragma once + +#include + +namespace adl +{ + +struct SortData +{ + SortData(){} + SortData( u32 key, u32 value ) : m_key(key), m_value(value) {} + + union + { + u32 m_key; + struct { u16 m_key16[2]; }; + }; + u32 m_value; + + friend bool operator <(const SortData& a, const SortData& b) + { + return a.m_key < b.m_key; + } +}; + + +}; diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/radixsortadvanced.inl b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/radixsortadvanced.inl new file mode 100644 index 000000000..210b5dd6b --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/radixsortadvanced.inl @@ -0,0 +1,146 @@ +/* + 2011 Takahiro Harada +*/ + +#define PATH "..\\..\\AdlPrimitives\\Sort\\RadixSortAdvancedKernels" +#define KERNEL0 "StreamCountKernel" +#define KERNEL1 "SortAndScatterKernel1" +#define KERNEL2 "PrefixScanKernel" + +template +class RadixSortAdvanced : public RadixSortBase +{ + public: + typedef Launcher::BufferInfo BufferInfo; + + enum + { + WG_SIZE = 128, + NUM_PER_WI = 4, + MAX_NUM_WORKGROUPS = 60, + }; + + struct Data : public RadixSort::Data + { + Kernel* m_localCountKernel; + Kernel* m_scatterKernel; + Kernel* m_scanKernel; + + Buffer* m_workBuffer0; + Buffer* m_workBuffer1; + Buffer* m_constBuffer[32/4]; + }; + + + static + Data* allocate(const Device* deviceData, int maxSize, Option option = SORT_NORMAL); + + static + void deallocate(void* data); + + static + void execute(void* data, Buffer& inout, int n, int sortBits); +}; + +template +typename RadixSortAdvanced::Data* RadixSortAdvanced::allocate(const Device* deviceData, int maxSize, Option option) +{ + ADLASSERT( type == deviceData->m_type ); + + const char* src[] = { 0, 0, 0 }; + + Data* data = new Data; + data->m_option = option; + data->m_deviceData = deviceData; + + data->m_localCountKernel = deviceData->getKernel( PATH, KERNEL0, 0, src[type] ); + data->m_scatterKernel = deviceData->getKernel( PATH, KERNEL1, 0, src[type] ); + data->m_scanKernel = deviceData->getKernel( PATH, KERNEL2, 0, src[type] ); + + data->m_workBuffer0 = new Buffer( deviceData, MAX_NUM_WORKGROUPS*16 ); + data->m_workBuffer1 = new Buffer( deviceData, maxSize ); + for(int i=0; i<32/4; i++) + data->m_constBuffer[i] = new Buffer( deviceData, 1, BufferBase::BUFFER_CONST ); + data->m_maxSize = maxSize; + + return data; +} + +template +void RadixSortAdvanced::deallocate(void* rawData) +{ + Data* data = (Data*)rawData; + + delete data->m_workBuffer0; + delete data->m_workBuffer1; + for(int i=0; i<32/4; i++) + delete data->m_constBuffer[i]; + + delete data; +} + +template +void RadixSortAdvanced::execute(void* rawData, Buffer& inout, int n, int sortBits) +{ + Data* data = (Data*)rawData; + + ADLASSERT( sortBits == 32 ); + + ADLASSERT( NUM_PER_WI == 4 ); + ADLASSERT( n%(WG_SIZE*NUM_PER_WI) == 0 ); + ADLASSERT( MAX_NUM_WORKGROUPS < 128*8/16 ); + + Buffer* src = &inout; + Buffer* dst = data->m_workBuffer1; + + const Device* deviceData = data->m_deviceData; + + int nBlocks = n/(NUM_PER_WI*WG_SIZE); + const int nWorkGroupsToExecute = min2((int)MAX_NUM_WORKGROUPS, nBlocks); + int nBlocksPerGroup = (nBlocks+nWorkGroupsToExecute-1)/nWorkGroupsToExecute; + ADLASSERT( nWorkGroupsToExecute <= MAX_NUM_WORKGROUPS ); + + int4 constBuffer = make_int4(0, nBlocks, nWorkGroupsToExecute, nBlocksPerGroup); + + int iPass = 0; + int startBit = 0; + for(int startBit=0; startBit<32; startBit+=4, iPass++) + { + constBuffer.x = startBit; + + { + BufferInfo bInfo[] = { BufferInfo( src, true ), BufferInfo( data->m_workBuffer0 ) }; + + Launcher launcher( deviceData, data->m_localCountKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( *data->m_constBuffer[iPass], constBuffer ); + launcher.launch1D( WG_SIZE* nWorkGroupsToExecute, WG_SIZE ); + } + + + { + BufferInfo bInfo[] = { BufferInfo( data->m_workBuffer0 ) }; + + Launcher launcher( deviceData, data->m_scanKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( *data->m_constBuffer[iPass], constBuffer ); + launcher.launch1D( WG_SIZE, WG_SIZE ); + } + + { + BufferInfo bInfo[] = { BufferInfo( data->m_workBuffer0, true ), BufferInfo( src ), BufferInfo( dst ) }; + + Launcher launcher( deviceData, data->m_scatterKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( *data->m_constBuffer[iPass], constBuffer ); + launcher.launch1D( WG_SIZE*nWorkGroupsToExecute, WG_SIZE ); + } + + swap2( src, dst ); + } +} + +#undef PATH +#undef KERNEL0 +#undef KERNEL1 +#undef KERNEL2 diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/radixsortsimple.inl b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/radixsortsimple.inl new file mode 100644 index 000000000..3fcab75fa --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Sort/radixsortsimple.inl @@ -0,0 +1,149 @@ +/* + 2011 Takahiro Harada +*/ + +#define PATH "..\\..\\opencl\\primitives\\AdlPrimitives\\Sort\\RadixSortSimpleKernels" +#define KERNEL0 "LocalCountKernel" +#define KERNEL1 "ScatterKernel" + +#include +#include + +template +class RadixSortSimple : public RadixSortBase +{ + public: + typedef Launcher::BufferInfo BufferInfo; + + enum + { + WG_SIZE = 128, + NUM_PER_WI = 4, + }; + + struct Data : public RadixSort::Data + { + Kernel* m_localCountKernel; + Kernel* m_scatterKernel; + + Buffer* m_workBuffer0; + Buffer* m_workBuffer1; + Buffer* m_workBuffer2; + Buffer* m_constBuffer[4]; + }; + + + static + Data* allocate(const Device* deviceData, int maxSize, Option option = SORT_NORMAL); + + static + void deallocate(void* data); + + static + void execute(void* data, Buffer& inout, int n, int sortBits); +}; + +template +typename RadixSortSimple::Data* RadixSortSimple::allocate(const Device* deviceData, int maxSize, Option option) +{ + ADLASSERT( type == deviceData->m_type ); + + const char* src[] = +#if defined(ADL_LOAD_KERNEL_FROM_STRING) + {radixSortSimpleKernelsCL, radixSortSimpleKernelsDX11}; +#else + { 0, 0 }; +#endif + u32 maxNumGroups = (maxSize+WG_SIZE*NUM_PER_WI-1)/(WG_SIZE*NUM_PER_WI); + + Data* data = new Data; + data->m_option = option; + data->m_deviceData = deviceData; + + data->m_localCountKernel = deviceData->getKernel( PATH, KERNEL0, 0, src[type] ); + data->m_scatterKernel = deviceData->getKernel( PATH, KERNEL1, 0, src[type] ); + + data->m_scanData = PrefixScan::allocate( deviceData, maxSize ); + + data->m_workBuffer0 = new Buffer( deviceData, maxNumGroups*256 ); + data->m_workBuffer1 = new Buffer( deviceData, maxNumGroups*256 ); + data->m_workBuffer2 = new Buffer( deviceData, maxSize ); + data->m_constBuffer[0] = new Buffer( deviceData, 1, BufferBase::BUFFER_CONST ); + data->m_constBuffer[1] = new Buffer( deviceData, 1, BufferBase::BUFFER_CONST ); + data->m_constBuffer[2] = new Buffer( deviceData, 1, BufferBase::BUFFER_CONST ); + data->m_constBuffer[3] = new Buffer( deviceData, 1, BufferBase::BUFFER_CONST ); + data->m_maxSize = maxSize; + + return data; +} + +template +void RadixSortSimple::deallocate(void* rawData) +{ + Data* data = (Data*)rawData; + + delete data->m_workBuffer0; + delete data->m_workBuffer1; + delete data->m_workBuffer2; + delete data->m_constBuffer[0]; + delete data->m_constBuffer[1]; + delete data->m_constBuffer[2]; + delete data->m_constBuffer[3]; + + PrefixScan::deallocate( data->m_scanData ); + + delete data; +} + +template +void RadixSortSimple::execute(void* rawData, Buffer& inout, int n, int sortBits) +{ + Data* data = (Data*)rawData; + + ADLASSERT( sortBits == 32 ); + ADLASSERT( n%512 == 0 ); + ADLASSERT( n <= data->m_maxSize ); + + Buffer* src = &inout; + Buffer* dst = data->m_workBuffer2; + + const Device* deviceData = data->m_deviceData; + + int numGroups = (n+WG_SIZE*NUM_PER_WI-1)/(WG_SIZE*NUM_PER_WI); + + int4 constBuffer; + + int iPass = 0; + for(int startBit=0; startBit<32; startBit+=8, iPass++) + { + constBuffer.x = startBit; + constBuffer.y = numGroups; + constBuffer.z = WG_SIZE; + + { + BufferInfo bInfo[] = { BufferInfo( src, true ), BufferInfo( data->m_workBuffer0 ) }; + + Launcher launcher( deviceData, data->m_localCountKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( *data->m_constBuffer[iPass], constBuffer ); + launcher.launch1D( WG_SIZE*numGroups, WG_SIZE ); + } + + PrefixScan::execute( data->m_scanData, *data->m_workBuffer0, *data->m_workBuffer1, numGroups*256 ); + + { + BufferInfo bInfo[] = { BufferInfo( src, true ), BufferInfo( dst ), BufferInfo( data->m_workBuffer1 ) }; + + Launcher launcher( deviceData, data->m_scatterKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( *data->m_constBuffer[iPass], constBuffer ); + launcher.launch1D( WG_SIZE*numGroups, WG_SIZE ); + } + + swap2( src, dst ); + } +} + +#undef PATH +#undef KERNEL0 +#undef KERNEL1 diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/stringify.py b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/stringify.py new file mode 100644 index 000000000..e79e281e4 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/stringify.py @@ -0,0 +1,13 @@ +#!/usr/bin/env python +import sys +import os +import shutil + +arg = sys.argv[1] +fh = open(arg) + +print 'static const char* '+sys.argv[2]+'= \\' +for line in fh.readlines(): + a = line.strip('\n') + print '"'+a+'\\n"' +print ';' diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/stringifykernels.bat b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/stringifykernels.bat new file mode 100644 index 000000000..3a2aa63f1 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/stringifykernels.bat @@ -0,0 +1,22 @@ +stringify.py Fill/FillKernels.cl fillKernelsCL >Fill/FillKernelsCL.h +stringify.py Fill/FillKernels.hlsl fillKernelsDX11 >Fill/FillKernelsDX11.h +stringify.py Scan/PrefixScanKernels.cl prefixScanKernelsCL >Scan/PrefixScanKernelsCL.h +stringify.py Scan/PrefixScanKernels.hlsl prefixScanKernelsDX11 >Scan/PrefixScanKernelsDX11.h +stringify.py Search/BoundSearchKernels.cl boundSearchKernelsCL >Search/BoundSearchKernelsCL.h +stringify.py Search/BoundSearchKernels.hlsl boundSearchKernelsDX11 >Search/BoundSearchKernelsDX11.h +stringify.py Sort/RadixSortSimpleKernels.cl radixSortSimpleKernelsCL >Sort/RadixSortSimpleKernelsCL.h +stringify.py Sort/RadixSortSimpleKernels.hlsl radixSortSimpleKernelsDX11 >Sort/RadixSortSimpleKernelsDX11.h +stringify.py Sort/RadixSortStandardKernels.cl radixSortStandardKernelsCL >Sort/RadixSortStandardKernelsCL.h + +stringify.py Sort/RadixSort32Kernels.cl radixSort32KernelsCL >Sort/RadixSort32KernelsCL.h +stringify.py Sort/RadixSort32Kernels.hlsl radixSort32KernelsDX11 >Sort/RadixSort32KernelsDX11.h + +stringify.py Copy/CopyKernels.cl copyKernelsCL >Copy/CopyKernelsCL.h +stringify.py Copy/CopyKernels.hlsl copyKernelsDX11 >Copy/CopyKernelsDX11.h + +stringify.py Sort/RadixSortStandardKernels.hlsl radixSortStandardKernelsDX11 >Sort/RadixSortStandardKernelsDX11.h +stringify.py Sort/RadixSortAdvancedKernels.hlsl radixSortAdvancedKernelsDX11 >Sort/RadixSortAdvancedKernelsDX11.h + + + + diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlTest/AMD/premake4.lua b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlTest/AMD/premake4.lua new file mode 100644 index 000000000..d47cec9fa --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlTest/AMD/premake4.lua @@ -0,0 +1,31 @@ + + hasCL = findOpenCL_AMD() + hasDX11 = findDirectX11() + + if (hasCL) then + + project "OpenCL_DX11_primitives_test_AMD" + + initOpenCL_AMD() + + if (hasDX11) then + initDirectX11() + end + + language "C++" + + kind "ConsoleApp" + targetdir "../../../../bin" + includedirs {"..","../.."} + + links { + "OpenCL" + } + + files { + "../main.cpp", + "../RadixSortBenchmark.h", + "../UnitTests.h" + } + + end \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlTest/Intel/premake4.lua b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlTest/Intel/premake4.lua new file mode 100644 index 000000000..157405f90 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlTest/Intel/premake4.lua @@ -0,0 +1,31 @@ + + hasCL = findOpenCL_Intel() + hasDX11 = findDirectX11() + + if (hasCL) then + + project "OpenCL_DX11_primitives_test_Intel" + + initOpenCL_Intel() + + if (hasDX11) then + initDirectX11() + end + + language "C++" + + kind "ConsoleApp" + targetdir "../../../../bin" + includedirs {"..","../.."} + + links { + "OpenCL" + } + + files { + "../main.cpp", + "../RadixSortBenchmark.h", + "../UnitTests.h" + } + + end \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlTest/LaunchOverheadBenchmark.h b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlTest/LaunchOverheadBenchmark.h new file mode 100644 index 000000000..4b1ae7d0f --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlTest/LaunchOverheadBenchmark.h @@ -0,0 +1,103 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +#include + + + +template +__inline +void copyTest( Device* device ) +{ + int size = 65*1024; + + Buffer buf0( device, size ); + Buffer buf1( device, size ); + + Stopwatch sw( device ); + + Copy::Data* data = Copy::allocate( device ); + + for(int i=0; i<10; i++) + Copy::execute( data, buf1, buf0, size, CopyBase::PER_WI_1 ); + DeviceUtils::waitForCompletion( device ); + + { + const int nTests = 12; + + float t[nTests]; + + for(int ii=0; ii::execute( data, buf1, buf0, size, CopyBase::PER_WI_1 ); + } + DeviceUtils::waitForCompletion( device ); + sw.stop(); + + t[ii] = sw.getMs()/(float)iter; + } + + for(int ii=0; ii::deallocate( data ); +} + +void launchOverheadBenchmark() +{ + printf("LaunchOverheadBenchmark\n"); + + + Device* ddcl; +#if defined(ADL_ENABLE_DX11) + Device* dddx; +#endif + { + DeviceUtils::Config cfg; + ddcl = DeviceUtils::allocate( TYPE_CL, cfg ); +#if defined(ADL_ENABLE_DX11) + dddx = DeviceUtils::allocate( TYPE_DX11, cfg ); +#endif + } + + { + printf("CL\n"); + copyTest( ddcl ); + } +#ifdef ADL_ENABLE_DX11 + { + printf("DX11\n"); + copyTest( dddx ); + } +#endif + + +} + + +//1, 2, 4, 8, 16, 32, 64, 128, 256, + diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlTest/NVIDIA/premake4.lua b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlTest/NVIDIA/premake4.lua new file mode 100644 index 000000000..e4d5cea98 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlTest/NVIDIA/premake4.lua @@ -0,0 +1,31 @@ + + hasCL = findOpenCL_NVIDIA() + hasDX11 = findDirectX11() + + if (hasCL) then + + project "OpenCL_DX11_primitives_test_NVIDIA" + + initOpenCL_NVIDIA() + + if (hasDX11) then + initDirectX11() + end + + language "C++" + + kind "ConsoleApp" + targetdir "../../../../bin" + includedirs {"..","../.."} + + links { + "OpenCL" + } + + files { + "../main.cpp", + "../RadixSortBenchmark.h", + "../UnitTests.h" + } + + end \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlTest/RadixSortBenchmark.h b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlTest/RadixSortBenchmark.h new file mode 100644 index 000000000..35404dd06 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlTest/RadixSortBenchmark.h @@ -0,0 +1,121 @@ + +template +void run( Device* device, int minSize = 512, int maxSize = 64*1024 )//, int increment = 512 ) +{ + ADLASSERT( TYPE == device->m_type ); + + Stopwatch sw( device ); + +// RadixSort::Data* data0 = RadixSort::allocate( device, maxSize, RadixSortBase::SORT_SIMPLE ); + RadixSort::Data* data0 = RadixSort::allocate( device, maxSize, RadixSortBase::SORT_STANDARD ); + RadixSort::Data* data1 = RadixSort::allocate( device, maxSize, RadixSortBase::SORT_STANDARD ); + RadixSort::Data* data2 = RadixSort::allocate( device, maxSize, RadixSortBase::SORT_ADVANCED ); + + Buffer buf0( device, maxSize ); + Buffer buf1( device, maxSize ); + Buffer buf2( device, maxSize ); + + SortData* input = new SortData[ maxSize ]; + +// for(int iter = minSize; iter<=maxSize; iter+=increment) + for(int iter = minSize; iter<=maxSize; iter*=2) + { + int size = NEXTMULTIPLEOF( iter, 512 ); + + for(int i=0; i::execute( data0, buf0, size ); + + sw.split(); + + RadixSort::execute( data1, buf1, size ); + + sw.split(); + + RadixSort::execute( data2, buf2, size ); + + sw.stop(); + + + float t[3]; + sw.getMs( t, 3 ); +// printf(" %d %3.2f %3.2f %3.2f\n", size, t[0], t[1], t[2]); + printf(" %d %3.2f %3.2f\n", size, t[1], t[2]); + } + + RadixSort::deallocate( data0 ); + RadixSort::deallocate( data1 ); + RadixSort::deallocate( data2 ); + + delete [] input; +} + +template +void run32( Device* device, int size ) +{ + //Cayman: 4194.30Keys: 373.05MKeys/s + //Cypress: 4194.30Keys: 315.13MKeys/s + ADLASSERT( TYPE == device->m_type ); + + Stopwatch sw( device ); + + RadixSort32::Data* data = RadixSort32::allocate( device, size ); + Copy::Data* copyData = Copy::allocate( device ); + + Buffer inputMaster( device, size ); + Buffer input( device, size ); + Buffer output( device, size ); + { + u32* host = new u32[size]; + for(int i=0; i::execute( copyData, (Buffer&)input, (Buffer&)inputMaster, size ); +// RadixSort32::execute( data, input, size ); + RadixSort32::execute( data, input, output, size ); + } + sw.stop(); + + { + float tInS = sw.getMs()/1000.f/(float)nIter; + float mKeysPerS = size/1000.f/1000.f/tInS; + printf("%3.2fMKeys: %3.2fMKeys/s\n", size/1000.f, mKeysPerS); + } + + RadixSort32::deallocate( data ); + Copy::deallocate( copyData ); +} + +template +void radixSortBenchmark() +{ + + Device* device; + { + DeviceUtils::Config cfg; + device = DeviceUtils::allocate( TYPE, cfg ); + } + + run32( device, 256*1024*8*2 ); +// run32( device, 256*20*6 ); + +// run( device, 512, 1024*128*4 ); + + DeviceUtils::deallocate( device ); + +} diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlTest/UnitTests.h b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlTest/UnitTests.h new file mode 100644 index 000000000..a1ab2e417 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlTest/UnitTests.h @@ -0,0 +1,801 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +#include +#include +#include +#include +#include +#include + +#include + +using namespace adl; + +#define NUM_TESTS 10 + +int g_nPassed = 0; +int g_nFailed = 0; +bool g_testFailed = 0; + +//#define TEST_INIT bool g_testFailed = 0; +#define TEST_INIT g_testFailed = 0; +#define TEST_ASSERT(x) if( !(x) ){g_testFailed = 1;} +//#define TEST_ASSERT(x) if( !(x) ){g_testFailed = 1;ADLASSERT(x);} +#define TEST_REPORT(testName) printf("[%s] %s\n",(g_testFailed)?"X":"O", testName); if(g_testFailed) g_nFailed++; else g_nPassed++; + +void memCpyTest( Device* deviceData ) +{ + TEST_INIT; + int maxSize = 64*1024; + Buffer buff( deviceData, maxSize ); + + u32* hostBuff = new u32[maxSize]; + + for(int iter=0; iterquery(deviceData, ".\\Kernel", "VectorAddKernel" ); + + { + int size = 1024; + Buffer buf0( deviceData, size ); + Buffer buf1( deviceData, size ); + Buffer cBuf( deviceData, 1, BufferBase::BUFFER_CONST ); + int* hostBuf0 = new int[size]; + int* hostBuf1 = new int[size]; + for(int i=0; i*)&buf0 ), Launcher::BufferInfo( (Buffer*)&buf1, true ) }; + + Launcher launcher( deviceData, kernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); + launcher.setConst( (Buffer&)cBuf, constBuffer ); + launcher.launch1D( size ); + + buf0.read( hostBuf0, size ); + buf1.read( hostBuf1, size ); + DeviceUtils::waitForCompletion( deviceData ); + } + + for(int i=0; i +void scanTest( Device* deviceGPU, Device* deviceHost ) +{ + TEST_INIT; + + ADLASSERT( type == deviceGPU->m_type ); + + int maxSize = 1024*256; + + HostBuffer buf0( deviceHost, maxSize ); + HostBuffer buf1( deviceHost, maxSize ); + Buffer buf2( deviceGPU, maxSize ); + Buffer buf3( deviceGPU, maxSize ); + + PrefixScan::Data* data0 = PrefixScan::allocate( deviceGPU, maxSize ); + PrefixScan::Data* data1 = PrefixScan::allocate( deviceHost, maxSize ); + + int dx = maxSize/NUM_TESTS; + for(int iter=0; iter::execute( data1, buf0, buf1, size, &sumHost ); + PrefixScan::execute( data0, buf2, buf3, size, &sumGPU ); + + buf3.read( buf0.m_ptr, size ); + DeviceUtils::waitForCompletion( deviceGPU ); + TEST_ASSERT( sumHost == sumGPU ); + for(int i=0; i::deallocate( data1 ); + PrefixScan::deallocate( data0 ); + + TEST_REPORT( "scanTest" ); +} + +template +bool radixSortTest( Device* deviceGPU, Device* deviceHost ) +{ + TEST_INIT; + ADLASSERT( type == deviceGPU->m_type ); + + int maxSize = 1024*256; + + HostBuffer buf0( deviceHost, maxSize ); + HostBuffer buf1( deviceHost, maxSize ); + Buffer buf2( deviceGPU, maxSize ); + + RadixSort::Data* dataH = RadixSort::allocate( deviceHost, maxSize, RadixSortBase::SORT_SIMPLE ); + RadixSort::Data* dataC = RadixSort::allocate( deviceGPU, maxSize, SORT_TYPE ); + + int dx = maxSize/NUM_TESTS; + for(int iter=0; iter::execute( dataH, buf0, size ); + RadixSort::execute( dataC, buf2, size ); + + buf2.read( buf1.m_ptr, size ); + DeviceUtils::waitForCompletion( deviceGPU ); + for(int i=0; i::deallocate( dataH ); + RadixSort::deallocate( dataC ); + + return g_testFailed; +} + +template +void radixSortSimpleTest( Device* deviceGPU, Device* deviceHost ) +{ + TEST_INIT; + g_testFailed = radixSortTest(deviceGPU, deviceHost); + TEST_REPORT( "radixSortSimpleTest" ); +} + +template +void radixSortStandardTest( Device* deviceGPU, Device* deviceHost ) +{ + TEST_INIT; + g_testFailed = radixSortTest(deviceGPU, deviceHost); + TEST_REPORT( "radixSortStandardTest" ); +} + +template +void radixSortAdvancedTest( Device* deviceGPU, Device* deviceHost ) +{ + TEST_INIT; + g_testFailed = radixSortTest(deviceGPU, deviceHost); + TEST_REPORT( "radixSortAdvancedTest" ); +} + +template +void boundSearchTest( Device* deviceGPU, Device* deviceHost ) +{ + TEST_INIT; + + ADLASSERT( type == deviceGPU->m_type ); + + int maxSize = 1024*256; + int bucketSize = 256; + + HostBuffer buf0( deviceHost, maxSize ); + HostBuffer lowerH( deviceHost, maxSize ); + HostBuffer upperH( deviceHost, maxSize ); + + Buffer buf( deviceGPU, maxSize ); + Buffer lower( deviceGPU, maxSize ); + Buffer upper( deviceGPU, maxSize ); + + BoundSearch::Data* dataH = BoundSearch::allocate( deviceGPU ); + RadixSort::Data* dataHSort = RadixSort::allocate( deviceHost, maxSize, RadixSortBase::SORT_SIMPLE ); + + int dx = maxSize/NUM_TESTS; + for(int iter=0; iter::execute( dataHSort, buf0, size ); + buf.write( buf0.m_ptr, size ); + { + u32* host = new u32[size]; + for(int i=0; i::execute( dataH, buf, size, lower, bucketSize, BoundSearchBase::BOUND_LOWER ); + BoundSearch::execute( dataH, buf, size, upper, bucketSize, BoundSearchBase::BOUND_UPPER ); + + lower.read( lowerH.m_ptr, bucketSize ); + upper.read( upperH.m_ptr, bucketSize ); + DeviceUtils::waitForCompletion( deviceGPU ); +/* + for(u32 i=1; i<(u32)bucketSize; i++) + { + for(u32 j=lowerH[i-1]; j::deallocate( dataH ); + RadixSort::deallocate( dataHSort ); + + TEST_REPORT( "boundSearchTest" ); +} + +template +void fillIntTest( Device* deviceGPU, Device* deviceHost ) +{ + TEST_INIT; + + ADLASSERT( type == deviceGPU->m_type ); + + int maxSize = 1024*256; + + HostBuffer buf0( deviceHost, maxSize ); + HostBuffer buf1( deviceHost, maxSize ); + Buffer buf2( deviceGPU, maxSize ); + + Fill::Data* data0 = Fill::allocate( deviceHost ); + Fill::Data* data1 = Fill::allocate( deviceGPU ); + + int dx = maxSize/NUM_TESTS; + for(int iter=0; iter::execute( data0, buf0, 12, size ); + Fill::execute( data1, buf2, 12, size ); + + buf2.read( buf1.m_ptr, size ); + DeviceUtils::waitForCompletion( deviceGPU ); + for(int i=0; i::deallocate( data0 ); + Fill::deallocate( data1 ); + + TEST_REPORT( "fillIntTest" ); +} + +template +void fillInt2Test( Device* deviceGPU, Device* deviceHost ) +{ + TEST_INIT; + + ADLASSERT( type == deviceGPU->m_type ); + + int maxSize = 1024*256; + + HostBuffer buf0( deviceHost, maxSize ); + HostBuffer buf1( deviceHost, maxSize ); + Buffer buf2( deviceGPU, maxSize ); + + Fill::Data* data0 = Fill::allocate( deviceHost ); + Fill::Data* data1 = Fill::allocate( deviceGPU ); + + int dx = maxSize/NUM_TESTS; + for(int iter=0; iter::execute( data0, buf0, make_int2( 12, 12 ), size ); + Fill::execute( data1, buf2, make_int2( 12, 12 ), size ); + + buf2.read( buf1.m_ptr, size ); + DeviceUtils::waitForCompletion( deviceGPU ); + for(int i=0; i::deallocate( data0 ); + Fill::deallocate( data1 ); + + TEST_REPORT( "fillInt2Test" ); +} + +template +void fillInt4Test( Device* deviceGPU, Device* deviceHost ) +{ + TEST_INIT; + + ADLASSERT( type == deviceGPU->m_type ); + + int maxSize = 1024*256; + + HostBuffer buf0( deviceHost, maxSize ); + HostBuffer buf1( deviceHost, maxSize ); + Buffer buf2( deviceGPU, maxSize ); + + Fill::Data* data0 = Fill::allocate( deviceHost ); + Fill::Data* data1 = Fill::allocate( deviceGPU ); + + int dx = maxSize/NUM_TESTS; + for(int iter=0; iter::execute( data0, buf0, make_int4( 12 ), size ); + Fill::execute( data1, buf2, make_int4( 12 ), size ); + + buf2.read( buf1.m_ptr, size ); + DeviceUtils::waitForCompletion( deviceGPU ); + for(int i=0; i::deallocate( data0 ); + Fill::deallocate( data1 ); + + TEST_REPORT( "fillInt4Test" ); +} + + +template +bool CopyF4Test( Device* deviceGPU, Device* deviceHost ) +{ + TEST_INIT; + ADLASSERT( type == deviceGPU->m_type ); + + int maxSize = 1024*256; + + HostBuffer buf0( deviceHost, maxSize ); + HostBuffer buf1( deviceHost, maxSize ); + Buffer buf2( deviceGPU, maxSize ); + Buffer buf3( deviceGPU, maxSize ); + HostBuffer devResult( deviceHost, maxSize ); + + Copy::Data* data0 = Copy::allocate( deviceHost ); + Copy::Data* data1 = Copy::allocate( deviceGPU ); + + int dx = maxSize/NUM_TESTS; + for(int iter=0; iter::execute( data0, buf1, buf0, size, OPTION ); + Copy::execute( data1, buf3, buf2, size, OPTION ); + + buf3.read( devResult.m_ptr, size ); + DeviceUtils::waitForCompletion( deviceGPU ); + for(int i=0; i::deallocate( data0 ); + Copy::deallocate( data1 ); + + return g_testFailed; +} + +template +void Copy1F4Test( Device* deviceGPU, Device* deviceHost ) +{ + TEST_INIT; + g_testFailed = CopyF4Test( deviceGPU, deviceHost ); + TEST_REPORT( "Copy1F4Test" ); +} + +template +void Copy2F4Test( Device* deviceGPU, Device* deviceHost ) +{ + TEST_INIT; + g_testFailed = CopyF4Test( deviceGPU, deviceHost ); + TEST_REPORT( "Copy2F4Test" ); +} + +template +void Copy4F4Test( Device* deviceGPU, Device* deviceHost ) +{ + TEST_INIT; + g_testFailed = CopyF4Test( deviceGPU, deviceHost ); + TEST_REPORT( "Copy4F4Test" ); +} + + +template +void CopyF1Test( Device* deviceGPU, Device* deviceHost ) +{ + TEST_INIT; + ADLASSERT( type == deviceGPU->m_type ); + + int maxSize = 1024*256; + + HostBuffer buf0( deviceHost, maxSize ); + HostBuffer buf1( deviceHost, maxSize ); + Buffer buf2( deviceGPU, maxSize ); + Buffer buf3( deviceGPU, maxSize ); + HostBuffer devResult( deviceHost, maxSize ); + + Copy::Data* data0 = Copy::allocate( deviceHost ); + Copy::Data* data1 = Copy::allocate( deviceGPU ); + + int dx = maxSize/NUM_TESTS; + for(int iter=0; iter::execute( data0, buf1, buf0, size ); + Copy::execute( data1, buf3, buf2, size ); + + buf3.read( devResult.m_ptr, size ); + DeviceUtils::waitForCompletion( deviceGPU ); + for(int i=0; i::deallocate( data0 ); + Copy::deallocate( data1 ); + + TEST_REPORT( "CopyF1Test" ); +} + +template +void CopyF2Test( Device* deviceGPU, Device* deviceHost ) +{ + TEST_INIT; + ADLASSERT( type == deviceGPU->m_type ); + + int maxSize = 1024*256; + + HostBuffer buf0( deviceHost, maxSize ); + HostBuffer buf1( deviceHost, maxSize ); + Buffer buf2( deviceGPU, maxSize ); + Buffer buf3( deviceGPU, maxSize ); + HostBuffer devResult( deviceHost, maxSize ); + + Copy::Data* data0 = Copy::allocate( deviceHost ); + Copy::Data* data1 = Copy::allocate( deviceGPU ); + + int dx = maxSize/NUM_TESTS; + for(int iter=0; iter::execute( data0, buf1, buf0, size ); + Copy::execute( data1, buf3, buf2, size ); + + buf3.read( devResult.m_ptr, size ); + DeviceUtils::waitForCompletion( deviceGPU ); + for(int i=0; i::deallocate( data0 ); + Copy::deallocate( data1 ); + + TEST_REPORT( "CopyF2Test" ); +} + +template +void radixSort32Test( Device* deviceGPU, Device* deviceHost ) +{ + TEST_INIT; + ADLASSERT( type == deviceGPU->m_type ); + + int maxSize = 1024*256; + + HostBuffer buf0( deviceHost, maxSize ); + HostBuffer buf1( deviceHost, maxSize ); + Buffer buf2( deviceGPU, maxSize ); + + RadixSort32::Data* dataH = RadixSort32::allocate( deviceHost, maxSize ); + RadixSort32::Data* dataC = RadixSort32::allocate( deviceGPU, maxSize ); + + int dx = maxSize/NUM_TESTS; + for(int iter=0; iter::execute( dataH, buf0, size, 32 ); + RadixSort32::execute( dataC, buf2, size, 32 ); + + buf2.read( buf1.m_ptr, size ); + DeviceUtils::waitForCompletion( deviceGPU ); +// for(int i=0; i::deallocate( dataH ); + RadixSort32::deallocate( dataC ); + + TEST_REPORT( "RadixSort32Test" ); +} + +template +void radixSortKeyValue32Test( Device* deviceGPU, Device* deviceHost ) +{ + TEST_INIT; + ADLASSERT( type == deviceGPU->m_type ); + + int maxSize = 1024*256; + + // Host buffers + HostBuffer buf0( deviceHost, maxSize ); // Buffer for keys in host and will be sorted by host. + HostBuffer buf1( deviceHost, maxSize ); // Buffer for keys in host and will be saved by device after sorting in device. + HostBuffer buf2( deviceHost, maxSize ); // Buffer for values in host. This buffer is paired with buf0. + HostBuffer buf3( deviceHost, maxSize ); // Buffer for values in host and will be saved by device after sorting. It is paired with buf1. + + // Device buffers + Buffer buf4( deviceGPU, maxSize ); // Buffer for input keys for device. + Buffer buf5( deviceGPU, maxSize ); // Buffer for output keys from device and will be sorted by device. This key data will be saved to buf1 to be compared with a result(buf0) from host. + Buffer buf6( deviceGPU, maxSize ); // Buffer for input values in device. + Buffer buf7( deviceGPU, maxSize ); // Buffer for output values in device. + + RadixSort32::Data* dataH = RadixSort32::allocate( deviceHost, maxSize ); + RadixSort32::Data* dataC = RadixSort32::allocate( deviceGPU, maxSize ); + + int dx = maxSize/NUM_TESTS; + + for(int iter=0; iter::execute( dataH, buf0, buf2, size, 32 ); + RadixSort32::execute( dataC, buf4, buf5, buf6, buf7, size, 32 ); + buf5.read( buf1.m_ptr, size ); + buf7.read( buf3.m_ptr, size ); + + DeviceUtils::waitForCompletion( deviceGPU ); + + for(int i=0; i::deallocate( dataH ); + RadixSort32::deallocate( dataC ); + + TEST_REPORT( "RadixSortKeyValue32Test" ); +} + +#if defined(ADL_ENABLE_DX11) + #define RUN_GPU( func ) func(ddcl); func(dddx); + #define RUN_GPU_TEMPLATE( func ) func( ddcl, ddhost ); func( dddx, ddhost ); + #define RUN_CL_TEMPLATE( func ) func( ddcl, ddhost ); +#else + #define RUN_GPU( func ) func(ddcl); + #define RUN_GPU_TEMPLATE( func ) func( ddcl, ddhost ); +#endif +#define RUN_ALL( func ) RUN_GPU( func ); func(ddhost); + +void runAllTest() +{ + g_nPassed = 0; + g_nFailed = 0; + + + Device* ddcl; + Device* ddhost; +#if defined(ADL_ENABLE_DX11) + Device* dddx; +#endif + + { + DeviceUtils::Config cfg; + + // Choose AMD or NVidia +#ifdef CL_PLATFORM_AMD + cfg.m_vendor = adl::DeviceUtils::Config::VD_AMD; +#endif + +#ifdef CL_PLATFORM_INTEL + cfg.m_vendor = adl::DeviceUtils::Config::VD_INTEL; + cfg.m_type = DeviceUtils::Config::DEVICE_CPU; +#endif + + +#ifdef CL_PLATFORM_NVIDIA + cfg.m_vendor = adl::DeviceUtils::Config::VD_NV; +#endif + + + ddcl = DeviceUtils::allocate( TYPE_CL, cfg ); + ddhost = DeviceUtils::allocate( TYPE_HOST, cfg ); +// cfg.m_type = DeviceUtils::Config::DEVICE_GPU; +#if defined(ADL_ENABLE_DX11) + dddx = DeviceUtils::allocate( TYPE_DX11, cfg ); +#endif + } + + { + char name[128]; + ddcl->getDeviceName( name ); + printf("CL: %s\n", name); +#ifdef ADL_ENABLE_DX11 + dddx->getDeviceName( name ); + printf("DX11: %s\n", name); +#endif + } + + RUN_GPU_TEMPLATE( radixSort32Test ); + RUN_GPU_TEMPLATE( radixSortKeyValue32Test ); + + if (1) + { + RUN_GPU_TEMPLATE( CopyF1Test ); + RUN_GPU_TEMPLATE( CopyF2Test ); + + boundSearchTest( ddhost, ddhost ); +// fillTest( ddhost, ddhost ); +// fillTest( ddcl, ddhost ); + + + + + RUN_GPU_TEMPLATE( boundSearchTest ); + + RUN_GPU_TEMPLATE( fillIntTest ); + RUN_GPU_TEMPLATE( fillInt2Test ); + RUN_GPU_TEMPLATE( fillInt4Test ); + + RUN_ALL( stopwatchTest ); + RUN_ALL( memCpyTest ); +// RUN_GPU( kernelTest ); + RUN_GPU_TEMPLATE( scanTest ); + RUN_GPU_TEMPLATE( radixSortSimpleTest ); + + RUN_GPU_TEMPLATE( radixSortStandardTest ); + + RUN_GPU_TEMPLATE( radixSort32Test ); + +// RUN_GPU_TEMPLATE( boundSearchTest ); + RUN_GPU_TEMPLATE( Copy1F4Test ); + RUN_GPU_TEMPLATE( Copy2F4Test ); + RUN_GPU_TEMPLATE( Copy4F4Test ); + } + + DeviceUtils::deallocate( ddcl ); + DeviceUtils::deallocate( ddhost ); +#if defined(ADL_ENABLE_DX11) + DeviceUtils::deallocate( dddx ); +#endif + + printf("=========\n%d Passed\n%d Failed\n", g_nPassed, g_nFailed); + + +} \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlTest/main.cpp b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlTest/main.cpp new file mode 100644 index 000000000..2f9eaa16b --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlTest/main.cpp @@ -0,0 +1,118 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada + + +#include + +#include +#include + +#include "UnitTests.h" +#include "RadixSortBenchmark.h" +#include "LaunchOverheadBenchmark.h" + + +#undef NUM_TESTS + + +struct ConstBuffer +{ + float4 m_a; + float4 m_b; + float4 m_c; +}; + +int main() +{ + if(0) + { // radix sort test + Device* deviceHost; + Device* deviceGPU; + { + DeviceUtils::Config cfg; + + // Choose AMD or NVidia +#ifdef CL_PLATFORM_AMD + cfg.m_vendor = DeviceUtils::Config::VD_AMD; +#endif + +#ifdef CL_PLATFORM_INTEL + cfg.m_vendor = DeviceUtils::Config::VD_INTEL; +#endif + +#ifdef CL_PLATFORM_NVIDIA + cfg.m_vendor = adl::DeviceUtils::Config::VD_NV; +#endif + deviceGPU = DeviceUtils::allocate( TYPE_DX11, cfg ); + deviceHost = DeviceUtils::allocate( TYPE_HOST, cfg ); + } + + { + int maxSize = 512*20; + int size = maxSize; + + HostBuffer buf0( deviceHost, maxSize ); + HostBuffer buf1( deviceHost, maxSize ); + Buffer buf2( deviceGPU, maxSize ); + + RadixSort::Data* dataH = RadixSort::allocate( deviceHost, maxSize, RadixSortBase::SORT_STANDARD ); + RadixSort::Data* dataC = RadixSort::allocate( deviceGPU, maxSize, RadixSortBase::SORT_ADVANCED ); + + { + size = NEXTMULTIPLEOF( size, 512 ); + + for(int i=0; i::execute( dataH, buf0, size ); + RadixSort::execute( dataC, buf2, size ); + + buf2.read( buf1.m_ptr, size ); + DeviceUtils::waitForCompletion( deviceGPU ); + for(int i=0; i::deallocate( dataH ); + RadixSort::deallocate( dataC ); + } + + DeviceUtils::deallocate( deviceHost ); + DeviceUtils::deallocate( deviceGPU ); + } + + if(0) + { + launchOverheadBenchmark(); + } + + if(0) + { + radixSortBenchmark(); + } + + if(0) + { + radixSortBenchmark(); + } + + if(1) + { + runAllTest(); + } + printf("End, press \n"); + getchar(); +} + diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlTest/premake4.lua b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlTest/premake4.lua new file mode 100644 index 000000000..2c16f4ba4 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlTest/premake4.lua @@ -0,0 +1,4 @@ + +include "AMD" +include "NVIDIA" +include "Intel" \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/benchmark/AMD/premake4.lua b/Extras/RigidBodyGpuPipeline/opencl/primitives/benchmark/AMD/premake4.lua new file mode 100644 index 000000000..370403738 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/benchmark/AMD/premake4.lua @@ -0,0 +1,29 @@ + + hasCL = findOpenCL_AMD() + hasDX11 = findDirectX11() + + if (hasCL) then + + project "OpenCL_DX11_radixsort_benchmark_AMD" + + initOpenCL_AMD() + + if (hasDX11) then + initDirectX11() + end + + language "C++" + + kind "ConsoleApp" + targetdir "../../../../bin" + includedirs {"..","../.."} + + links { + "OpenCL" + } + + files { + "../test_large_problem_sorting.cpp" + } + + end \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/benchmark/NVIDIA/premake4.lua b/Extras/RigidBodyGpuPipeline/opencl/primitives/benchmark/NVIDIA/premake4.lua new file mode 100644 index 000000000..b959d13fc --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/benchmark/NVIDIA/premake4.lua @@ -0,0 +1,29 @@ + + hasCL = findOpenCL_NVIDIA() + hasDX11 = findDirectX11() + + if (hasCL) then + + project "OpenCL_DX11_radixsort_benchmark_NVIDIA" + + initOpenCL_NVIDIA() + + if (hasDX11) then + initDirectX11() + end + + language "C++" + + kind "ConsoleApp" + targetdir "../../../../bin" + includedirs {"..","../.."} + + links { + "OpenCL" + } + + files { + "../test_large_problem_sorting.cpp" + } + + end \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/benchmark/premake4.lua b/Extras/RigidBodyGpuPipeline/opencl/primitives/benchmark/premake4.lua new file mode 100644 index 000000000..e3cf35221 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/benchmark/premake4.lua @@ -0,0 +1,2 @@ +include "AMD" +include "NVIDIA" diff --git a/Extras/RigidBodyGpuPipeline/opencl/primitives/benchmark/test_large_problem_sorting.cpp b/Extras/RigidBodyGpuPipeline/opencl/primitives/benchmark/test_large_problem_sorting.cpp new file mode 100644 index 000000000..b1673012d --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/benchmark/test_large_problem_sorting.cpp @@ -0,0 +1,705 @@ +/****************************************************************************** + * Copyright 2010 Duane Merrill + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * + * + * + * AUTHORS' REQUEST: + * + * If you use|reference|benchmark this code, please cite our Technical + * Report (http://www.cs.virginia.edu/~dgm4d/papers/RadixSortTR.pdf): + * + * @TechReport{ Merrill:Sorting:2010, + * author = "Duane Merrill and Andrew Grimshaw", + * title = "Revisiting Sorting for GPGPU Stream Architectures", + * year = "2010", + * institution = "University of Virginia, Department of Computer Science", + * address = "Charlottesville, VA, USA", + * number = "CS2010-03" + * } + * + * For more information, see our Google Code project site: + * http://code.google.com/p/back40computing/ + * + * Thanks! + ******************************************************************************/ + +/****************************************************************************** + * Simple test driver program for *large-problem* radix sorting. + * + * Useful for demonstrating how to integrate radix sorting into + * your application + ******************************************************************************/ + +/****************************************************************************** + * Converted from CUDA to OpenCL/DirectCompute by Erwin Coumans + ******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#define BUFFERSIZE_WORKAROUND + +//#include +#include +/********************** +* +*/ + +#include "Adl/Adl.h" +#include "AdlPrimitives/Sort/RadixSort32.h" +#include "AdlPrimitives/Sort/SortData.h" + +using namespace adl; + + +/*********************** +* +*/ + +bool g_verbose; + + +/****************************************************************************** + * Routines + ******************************************************************************/ + + +/** + * Keys-only sorting. Uses the GPU to sort the specified vector of elements for the given + * number of iterations, displaying runtime information. + * + * @param[in] num_elements + * Size in elements of the vector to sort + * @param[in] h_keys + * Vector of keys to sort + * @param[in] iterations + * Number of times to invoke the GPU sorting primitive + * @param[in] cfg + * Config + */ +template +void TimedSort( + unsigned int num_elements, + K *h_keys, + unsigned int iterations, const DeviceUtils::Config& cfg) +{ + std::string sType = "No type selected"; + + if (type == TYPE_CL) + sType = "OpenCL"; + else if (type == TYPE_DX11) + sType = "DX11"; + + printf("Keys-only, %s, %d iterations, %d elements\n", sType.c_str(), iterations, num_elements); + + int max_elements = num_elements; + +#ifdef BUFFERSIZE_WORKAROUND + if (max_elements < 1024*256) + max_elements = 1024*256; +#endif + + // Allocate device storage + Device* deviceData = NULL; + + if ( type == TYPE_CL ) + deviceData = new DeviceCL(); +#ifdef ADL_ENABLE_DX11 + else if ( type == TYPE_DX11 ) + deviceData = new DeviceDX11(); +#endif //ADL_ENABLE_DX11 + + deviceData->initialize(cfg); + + RadixSort32::Data* planData = RadixSort32::allocate( deviceData, max_elements); + + { + Buffer keysInOut(deviceData,max_elements); + + // Create sorting enactor + keysInOut.write(h_keys,num_elements); + DeviceUtils::waitForCompletion( deviceData); + + RadixSort32::execute( planData,keysInOut,num_elements, 32); + DeviceUtils::waitForCompletion( deviceData); + + // Perform the timed number of sorting iterations + double elapsed = 0; + float duration = 0; + StopwatchHost watch; + watch.init(deviceData); + + watch.start(); + + for (int i = 0; i < iterations; i++) + { + + // Move a fresh copy of the problem into device storage + keysInOut.write(h_keys,num_elements); + DeviceUtils::waitForCompletion( deviceData); + + // Start GPU timing record + watch.start(); + + // Call the sorting API routine + RadixSort32::execute( planData,keysInOut,num_elements, 32); + DeviceUtils::waitForCompletion( deviceData); + + watch.stop(); + duration = watch.getMs(); + + // End GPU timing record + elapsed += (double) duration; + } + + // Display timing information + double avg_runtime = elapsed / iterations; + // double throughput = ((double) num_elements) / avg_runtime / 1000.0 / 1000.0; + // printf(", %f GPU ms, %f x10^9 elts/sec\n", avg_runtime, throughput); + double throughput = ((double) num_elements) / avg_runtime / 1000.0 ; + printf(", %f GPU ms, %f x10^6 elts/sec\n", avg_runtime, throughput); + + // Copy out data + keysInOut.read(h_keys,num_elements); + + DeviceUtils::waitForCompletion( deviceData); + + } + // Free allocated memory + RadixSort32::deallocate( planData); + delete deviceData; + // Clean up events +} + +/** + * Key-value sorting. Uses the GPU to sort the specified vector of elements for the given + * number of iterations, displaying runtime information. + * + * @param[in] num_elements + * Size in elements of the vector to sort + * @param[in] h_keys + * Vector of keys to sort + * @param[in,out] h_values + * Vector of values to sort + * @param[in] iterations + * Number of times to invoke the GPU sorting primitive + * @param[in] cfg + * Config + */ +template +void TimedSort( + unsigned int num_elements, + K *h_keys, + V *h_values, + unsigned int iterations, const DeviceUtils::Config& cfg) +{ + std::string sType = "No type selected"; + + if (type == TYPE_CL) + sType = "OpenCL"; + else if (type == TYPE_DX11) + sType = "DX11"; + + printf("Key-values, %s, %d iterations, %d elements\n", sType.c_str(), iterations, num_elements); + + int max_elements = num_elements; + +#ifdef BUFFERSIZE_WORKAROUND + if (max_elements < 1024*256) + max_elements = 1024*256; +#endif + + // Allocate device storage + Device* deviceData = NULL; + + if ( type == TYPE_CL ) + deviceData = new DeviceCL(); +#ifdef ADL_ENABLE_DX11 + else if ( type == TYPE_DX11 ) + deviceData = new DeviceDX11(); +#endif //ADL_ENABLE_DX11 + + deviceData->initialize(cfg); + RadixSort32::Data* planData = RadixSort32::allocate( deviceData, max_elements); + { + Buffer keysIn(deviceData,max_elements); + Buffer valuesIn(deviceData,max_elements); + + Buffer keysOut(deviceData,max_elements); + Buffer valuesOut(deviceData,max_elements); + + //printf("Key-values, %d iterations, %d elements", iterations, num_elements); + + // Create sorting enactor + keysIn.write(h_keys,num_elements); + DeviceUtils::waitForCompletion( deviceData); + valuesIn.write(h_values,num_elements); + DeviceUtils::waitForCompletion( deviceData); + + + // Perform a single sorting iteration to allocate memory, prime code caches, etc. + //RadixSort::execute( planData, buffer, num_elements ); + + //RadixSort32::execute( planData, keysIn,keysOut, valuesIn,valuesOut, num_elements, 32); + RadixSort32::execute( planData, keysIn,keysOut, valuesIn,valuesOut, num_elements, 32); + DeviceUtils::waitForCompletion( deviceData); + + // Perform the timed number of sorting iterations + double elapsed = 0; + float duration = 0; + StopwatchHost watch; + watch.init(deviceData); + + watch.start(); + + for (int i = 0; i < iterations; i++) + { + + // Move a fresh copy of the problem into device storage + keysIn.write(h_keys,num_elements); + valuesIn.write(h_values,num_elements); + + DeviceUtils::waitForCompletion( deviceData); + + // Start GPU timing record + watch.start(); + + // Call the sorting API routine + + RadixSort32::execute( planData, keysIn,keysOut, valuesIn,valuesOut, num_elements, 32); + + DeviceUtils::waitForCompletion( deviceData); + + watch.stop(); + duration = watch.getMs(); + + // End GPU timing record + elapsed += (double) duration; + } + + // Display timing information + double avg_runtime = elapsed / iterations; + // double throughput = ((double) num_elements) / avg_runtime / 1000.0 / 1000.0; + // printf(", %f GPU ms, %f x10^9 elts/sec\n", avg_runtime, throughput); + double throughput = ((double) num_elements) / avg_runtime / 1000.0 ; + printf(", %f GPU ms, %f x10^6 elts/sec\n", avg_runtime, throughput); + + //memset(h_keys,1,num_elements); + //memset(h_values,1,num_elements); + // Copy out data + keysOut.read(h_keys,num_elements); + valuesOut.read(h_values,num_elements); + + DeviceUtils::waitForCompletion( deviceData); + } + + // Free allocated memory + RadixSort32::deallocate( planData); + delete deviceData; + // Clean up events + +} + + + +/** + * Generates random 32-bit keys. + * + * We always take the second-order byte from rand() because the higher-order + * bits returned by rand() are commonly considered more uniformly distributed + * than the lower-order bits. + * + * We can decrease the entropy level of keys by adopting the technique + * of Thearling and Smith in which keys are computed from the bitwise AND of + * multiple random samples: + * + * entropy_reduction | Effectively-unique bits per key + * ----------------------------------------------------- + * -1 | 0 + * 0 | 32 + * 1 | 25.95 + * 2 | 17.41 + * 3 | 10.78 + * 4 | 6.42 + * ... | ... + * + */ +template +void RandomBits(K &key, int entropy_reduction = 0, int lower_key_bits = sizeof(K) * 8) +{ + const unsigned int NUM_UCHARS = (sizeof(K) + sizeof(unsigned char) - 1) / sizeof(unsigned char); + unsigned char key_bits[NUM_UCHARS]; + + do { + + for (int j = 0; j < NUM_UCHARS; j++) { + unsigned char quarterword = 0xff; + for (int i = 0; i <= entropy_reduction; i++) { + quarterword &= (rand() >> 7); + } + key_bits[j] = quarterword; + } + + if (lower_key_bits < sizeof(K) * 8) { + unsigned long long base = 0; + memcpy(&base, key_bits, sizeof(K)); + base &= (1 << lower_key_bits) - 1; + memcpy(key_bits, &base, sizeof(K)); + } + + memcpy(&key, key_bits, sizeof(K)); + + } while (key != key); // avoids NaNs when generating random floating point numbers +} + + +/****************************************************************************** + * Templated routines for printing keys/values to the console + ******************************************************************************/ + +template +void PrintValue(T val) { + printf("%d", val); +} + +template<> +void PrintValue(float val) { + printf("%f", val); +} + +template<> +void PrintValue(double val) { + printf("%f", val); +} + +template<> +void PrintValue(unsigned char val) { + printf("%u", val); +} + +template<> +void PrintValue(unsigned short val) { + printf("%u", val); +} + +template<> +void PrintValue(unsigned int val) { + printf("%u", val); +} + +template<> +void PrintValue(long val) { + printf("%ld", val); +} + +template<> +void PrintValue(unsigned long val) { + printf("%lu", val); +} + +template<> +void PrintValue(long long val) { + printf("%lld", val); +} + +template<> +void PrintValue(unsigned long long val) { + printf("%llu", val); +} + + + +/** + * Compares the equivalence of two arrays + */ +template +int CompareResults(T* computed, T* reference, SizeT len, bool verbose = true) +{ + printf("\n"); + for (SizeT i = 0; i < len; i++) { + + if (computed[i] != reference[i]) { + printf("INCORRECT: [%lu]: ", (unsigned long) i); + PrintValue(computed[i]); + printf(" != "); + PrintValue(reference[i]); + + if (verbose) { + printf("\nresult[..."); + for (size_t j = (i >= 5) ? i - 5 : 0; (j < i + 5) && (j < len); j++) { + PrintValue(computed[j]); + printf(", "); + } + printf("...]"); + printf("\nreference[..."); + for (size_t j = (i >= 5) ? i - 5 : 0; (j < i + 5) && (j < len); j++) { + PrintValue(reference[j]); + printf(", "); + } + printf("...]"); + } + + return 1; + } + } + + printf("CORRECT\n"); + return 0; +} + +/** + * Creates an example sorting problem whose keys is a vector of the specified + * number of K elements, values of V elements, and then dispatches the problem + * to the GPU for the given number of iterations, displaying runtime information. + * + * @param[in] iterations + * Number of times to invoke the GPU sorting primitive + * @param[in] num_elements + * Size in elements of the vector to sort + * @param[in] cfg + * Config + */ +template +void TestSort( + unsigned int iterations, + int num_elements, + bool keys_only, const DeviceUtils::Config& cfg) +{ + // Allocate the sorting problem on the host and fill the keys with random bytes + + K *h_keys = NULL; + K *h_reference_keys = NULL; + V *h_values = NULL; + h_keys = (K*) malloc(num_elements * sizeof(K)); + h_reference_keys = (K*) malloc(num_elements * sizeof(K)); + if (!keys_only) h_values = (V*) malloc(num_elements * sizeof(V)); + + + // Use random bits + for (unsigned int i = 0; i < num_elements; ++i) { + RandomBits(h_keys[i], 0); + //h_keys[i] = 0xffffffffu-i; + if (!keys_only) + h_values[i] = h_keys[i];//0xffffffffu-i; + + h_reference_keys[i] = h_keys[i]; + } + + // Run the timing test + if (keys_only) { + TimedSort(num_elements, h_keys, iterations, cfg); + } else { + TimedSort(num_elements, h_keys, h_values, iterations, cfg); + } + +// cudaThreadSynchronize(); + + // Display sorted key data + if (g_verbose) { + printf("\n\nKeys:\n"); + for (int i = 0; i < num_elements; i++) { + PrintValue(h_keys[i]); + printf(", "); + } + printf("\n\n"); + } + + // Verify solution + std::sort(h_reference_keys, h_reference_keys + num_elements); + CompareResults(h_keys, h_reference_keys, num_elements, true); + printf("\n"); + fflush(stdout); + + // Free our allocated host memory + if (h_keys != NULL) free(h_keys); + if (h_values != NULL) free(h_values); +} + + + +/** + * Displays the commandline usage for this tool + */ +void Usage() +{ + printf("\ntest_large_problem_sorting [--device=] [--v] [--i=] [--n=] [--keys-only]\n"); + printf("\n"); + printf("\t--v\tDisplays sorted results to the console.\n"); + printf("\n"); + printf("\t--i\tPerforms the sorting operation times\n"); + printf("\t\t\ton the device. Re-copies original input each time. Default = 1\n"); + printf("\n"); + printf("\t--n\tThe number of elements to comprise the sample problem\n"); + printf("\t\t\tDefault = 512\n"); + printf("\n"); + printf("\t--keys-only\tSpecifies that keys are not accommodated by value pairings\n"); + printf("\n"); +} + + +/****************************************************************************** + * Command-line parsing + ******************************************************************************/ +#include +#include +#include + +class CommandLineArgs +{ +protected: + + std::map pairs; + +public: + + // Constructor + CommandLineArgs(int argc, char **argv) + { + using namespace std; + + for (int i = 1; i < argc; i++) + { + string arg = argv[i]; + + if ((arg[0] != '-') || (arg[1] != '-')) { + continue; + } + + string::size_type pos; + string key, val; + if ((pos = arg.find( '=')) == string::npos) { + key = string(arg, 2, arg.length() - 2); + val = ""; + } else { + key = string(arg, 2, pos - 2); + val = string(arg, pos + 1, arg.length() - 1); + } + pairs[key] = val; + } + } + + bool CheckCmdLineFlag(const char* arg_name) + { + using namespace std; + map::iterator itr; + if ((itr = pairs.find(arg_name)) != pairs.end()) { + return true; + } + return false; + } + + template + void GetCmdLineArgument(const char *arg_name, T &val); + + int ParsedArgc() + { + return pairs.size(); + } +}; + +template +void CommandLineArgs::GetCmdLineArgument(const char *arg_name, T &val) +{ + using namespace std; + map::iterator itr; + if ((itr = pairs.find(arg_name)) != pairs.end()) { + istringstream strstream(itr->second); + strstream >> val; + } +} + +template <> +void CommandLineArgs::GetCmdLineArgument(const char* arg_name, char* &val) +{ + using namespace std; + map::iterator itr; + if ((itr = pairs.find(arg_name)) != pairs.end()) { + + string s = itr->second; + val = (char*) malloc(sizeof(char) * (s.length() + 1)); + strcpy(val, s.c_str()); + + } else { + val = NULL; + } +} + + + + + +/****************************************************************************** + * Main + ******************************************************************************/ + +int main( int argc, char** argv) +{ + + //srand(time(NULL)); + srand(0); // presently deterministic + + unsigned int num_elements = 1024*1024*12;//16*1024;//8*524288;//2048;//512;//524288; + unsigned int iterations = 10; + bool keys_only; + + // + // Check command line arguments + // + + CommandLineArgs args(argc,argv); + + if (args.CheckCmdLineFlag("help")) + { + Usage(); + return 0; + } + + args.GetCmdLineArgument("i", iterations); + args.GetCmdLineArgument("n", num_elements); + keys_only = args.CheckCmdLineFlag("keys-only"); + g_verbose = args.CheckCmdLineFlag("v"); + + DeviceUtils::Config cfg; + + // Choose AMD or NVidia +#ifdef CL_PLATFORM_AMD + cfg.m_vendor = DeviceUtils::Config::VD_AMD; +#endif + +#ifdef CL_PLATFORM_NVIDIA + cfg.m_vendor = DeviceUtils::Config::VD_NV; +#endif + + TestSort( + iterations, + num_elements, + keys_only, cfg); + +#ifdef ADL_ENABLE_DX11 + TestSort( + iterations, + num_elements, + keys_only, cfg); +#endif //ADL_ENABLE_DX11 +} + + + diff --git a/Extras/RigidBodyGpuPipeline/opencl/vector_add/AMD/premake4.lua b/Extras/RigidBodyGpuPipeline/opencl/vector_add/AMD/premake4.lua new file mode 100644 index 000000000..4b5e39b9b --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/vector_add/AMD/premake4.lua @@ -0,0 +1,21 @@ + + hasCL = findOpenCL_AMD() + + if (hasCL) then + + project "OpenCL_VectorAdd_AMD" + + initOpenCL_AMD() + + language "C++" + + kind "ConsoleApp" + targetdir "../../../bin" + + files { + "../main.cpp", + "../../basic_initialize/btOpenCLUtils.cpp", + "../../basic_initialize/btOpenCLUtils.h" + } + + end \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/vector_add/Intel/premake4.lua b/Extras/RigidBodyGpuPipeline/opencl/vector_add/Intel/premake4.lua new file mode 100644 index 000000000..0f1be986b --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/vector_add/Intel/premake4.lua @@ -0,0 +1,23 @@ + + hasCL = findOpenCL_Intel() + + if (hasCL) then + + project "OpenCL_intialize_Intel" + + initOpenCL_Intel() + + language "C++" + + kind "ConsoleApp" + targetdir "../../../bin" + +-- includedirs {"..","../../../../include/gpu_research"} + + files { + "../main.cpp", + "../btOpenCLUtils.cpp", + "../btOpenCLUtils.h" + } + + end \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/vector_add/NVIDIA/premake4.lua b/Extras/RigidBodyGpuPipeline/opencl/vector_add/NVIDIA/premake4.lua new file mode 100644 index 000000000..e7c1d156f --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/vector_add/NVIDIA/premake4.lua @@ -0,0 +1,23 @@ + + hasCL = findOpenCL_NVIDIA() + + if (hasCL) then + + project "OpenCL_intialize_NVIDIA" + + initOpenCL_NVIDIA() + + language "C++" + + kind "ConsoleApp" + targetdir "../../../bin" + +-- includedirs {"..","../../../../include/gpu_research"} + + files { + "../main.cpp", + "../btOpenCLUtils.cpp", + "../btOpenCLUtils.h" + } + + end \ No newline at end of file diff --git a/Extras/RigidBodyGpuPipeline/opencl/vector_add/VectorAddKernels.cl b/Extras/RigidBodyGpuPipeline/opencl/vector_add/VectorAddKernels.cl new file mode 100644 index 000000000..2ff17826a --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/vector_add/VectorAddKernels.cl @@ -0,0 +1,16 @@ + + +__kernel void VectorAdd(__global const float8* a, __global const float8* b, __global float8* c, int numElements) +{ + // get oct-float index into global data array + int iGID = get_global_id(0); + if (iGID>=numElements) + return; + + float8 aGID = a[iGID]; + float8 bGID = b[iGID]; + + float8 result = aGID + bGID; + // write back out to GMEM + c[iGID] = result; +} diff --git a/Extras/RigidBodyGpuPipeline/opencl/vector_add/VectorAddKernels.h b/Extras/RigidBodyGpuPipeline/opencl/vector_add/VectorAddKernels.h new file mode 100644 index 000000000..fb7e2fc21 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/vector_add/VectorAddKernels.h @@ -0,0 +1,18 @@ +static const char* vectorAddCL= \ +"\n" +"\n" +"__kernel void VectorAdd(__global const float8* a, __global const float8* b, __global float8* c, int numElements)\n" +"{\n" +" // get oct-float index into global data array\n" +" int iGID = get_global_id(0);\n" +" if (iGID>=numElements)\n" +" return;\n" +"\n" +" float8 aGID = a[iGID];\n" +" float8 bGID = b[iGID];\n" +"\n" +" float8 result = aGID + bGID;\n" +" // write back out to GMEM\n" +" c[iGID] = result;\n" +"}\n" +; diff --git a/Extras/RigidBodyGpuPipeline/opencl/vector_add/main.cpp b/Extras/RigidBodyGpuPipeline/opencl/vector_add/main.cpp new file mode 100644 index 000000000..54afa4ee8 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/vector_add/main.cpp @@ -0,0 +1,367 @@ + +///VectorAdd sample, from the NVidia JumpStart Guide +///http://developer.download.nvidia.com/OpenCL/NVIDIA_OpenCL_JumpStart_Guide.pdf + +///Instead of #include we include +///Apart from this include file, all other code should compile and work on OpenCL compliant implementation + + +//#define LOAD_FROM_FILE + +#ifdef __APPLE__ + #include +#else + #include +#endif //__APPLE__ + +#include +#include +#include +#include + +#define GRID3DOCL_CHECKERROR(a, b) if((a)!=(b)) { printf("3D GRID OCL Error : %d\n", (a)); btAssert((a) == (b)); } +size_t wgSize; + +#include "VectorAddKernels.h" + + + +char* loadProgSource(const char* cFilename, const char* cPreamble, size_t* szFinalLength) +{ + // locals + FILE* pFileStream = NULL; + size_t szSourceLength; + + // open the OpenCL source code file + pFileStream = fopen(cFilename, "rb"); + if(pFileStream == 0) + { + return NULL; + } + + size_t szPreambleLength = strlen(cPreamble); + + // get the length of the source code + fseek(pFileStream, 0, SEEK_END); + szSourceLength = ftell(pFileStream); + fseek(pFileStream, 0, SEEK_SET); + + // allocate a buffer for the source code string and read it in + char* cSourceString = (char *)malloc(szSourceLength + szPreambleLength + 1); + memcpy(cSourceString, cPreamble, szPreambleLength); + fread((cSourceString) + szPreambleLength, szSourceLength, 1, pFileStream); + + // close the file and return the total length of the combined (preamble + source) string + fclose(pFileStream); + if(szFinalLength != 0) + { + *szFinalLength = szSourceLength + szPreambleLength; + } + cSourceString[szSourceLength + szPreambleLength] = '\0'; + + return cSourceString; +} + +size_t workitem_size[3]; + +void printDevInfo(cl_device_id device) +{ + char device_string[1024]; + + clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_string), &device_string, NULL); + printf( " Device %s:\n", device_string); + + // CL_DEVICE_INFO + cl_device_type type; + clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(type), &type, NULL); + if( type & CL_DEVICE_TYPE_CPU ) + printf(" CL_DEVICE_TYPE:\t\t%s\n", "CL_DEVICE_TYPE_CPU"); + if( type & CL_DEVICE_TYPE_GPU ) + printf( " CL_DEVICE_TYPE:\t\t%s\n", "CL_DEVICE_TYPE_GPU"); + if( type & CL_DEVICE_TYPE_ACCELERATOR ) + printf( " CL_DEVICE_TYPE:\t\t%s\n", "CL_DEVICE_TYPE_ACCELERATOR"); + if( type & CL_DEVICE_TYPE_DEFAULT ) + printf( " CL_DEVICE_TYPE:\t\t%s\n", "CL_DEVICE_TYPE_DEFAULT"); + + // CL_DEVICE_MAX_COMPUTE_UNITS + cl_uint compute_units; + clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(compute_units), &compute_units, NULL); + printf( " CL_DEVICE_MAX_COMPUTE_UNITS:\t%d\n", compute_units); + + // CL_DEVICE_MAX_WORK_GROUP_SIZE + + clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(workitem_size), &workitem_size, NULL); + printf( " CL_DEVICE_MAX_WORK_ITEM_SIZES:\t%zu / %zu / %zu \n", workitem_size[0], workitem_size[1], workitem_size[2]); + +} + + + + +// Main function +// ********************************************************************* +int main(int argc, char **argv) +{ + void *srcA, *srcB, *dst; // Host buffers for OpenCL test + cl_context cxGPUContext; // OpenCL context + cl_command_queue cqCommandQue; // OpenCL command que + cl_device_id* cdDevices; // OpenCL device list + cl_program cpProgram; // OpenCL program + cl_kernel ckKernel; // OpenCL kernel + cl_mem cmMemObjs[3]; // OpenCL memory buffer objects: 3 for device + size_t szGlobalWorkSize[1]; // 1D var for Total # of work items + size_t szLocalWorkSize[1]; // 1D var for # of work items in the work group + size_t szParmDataBytes; // Byte size of context information + cl_int ciErr1, ciErr2; // Error code var + + + int iTestN = 100000 * 8; // Size of Vectors to process + + int actualGlobalSize = iTestN / 8; + + + // set Global and Local work size dimensions + szGlobalWorkSize[0] = iTestN >> 3; // do 8 computations per work item + szLocalWorkSize[0]= iTestN>>3; + + + // Allocate and initialize host arrays + srcA = (void *)malloc (sizeof(cl_float) * iTestN); + srcB = (void *)malloc (sizeof(cl_float) * iTestN); + dst = (void *)malloc (sizeof(cl_float) * iTestN); + + int i; + + // Initialize arrays with some values + for (i=0;i processing outside of the buffer + //make sure to check kernel + } + + size_t globalThreads[] = {num_t * workgroupSize}; + size_t localThreads[] = {workgroupSize}; + + + localWorkSize[0] = workgroupSize; + globalWorkSize[0] = num_t * workgroupSize; + localWorkSize[1] = 1; + globalWorkSize[1] = 1; + + // Copy input data from host to GPU and launch kernel + ciErr1 |= clEnqueueNDRangeKernel(cqCommandQue, ckKernel, 1, NULL, globalThreads, localThreads, 0, NULL, NULL); + + } + + if (ciErrNum != CL_SUCCESS) + { + printf("cannot clEnqueueNDRangeKernel\n"); + exit(0); + } + + clFinish(cqCommandQue); + // Read back results and check accumulated errors + ciErr1 |= clEnqueueReadBuffer(cqCommandQue, cmMemObjs[2], CL_TRUE, 0, sizeof(cl_float8) * szGlobalWorkSize[0], dst, 0, NULL, NULL); + + // Release kernel, program, and memory objects + // NOTE: Most properly this should be done at any of the exit points above, but it is omitted elsewhere for clarity. + free(cdDevices); + clReleaseKernel(ckKernel); + clReleaseProgram(cpProgram); + clReleaseCommandQueue(cqCommandQue); + clReleaseContext(cxGPUContext); + + + // print the results + int iErrorCount = 0; + for (i = 0; i < iTestN; i++) + { + if (((float*)dst)[i] != ((float*)srcA)[i]+((float*)srcB)[i]) + iErrorCount++; + } + + if (iErrorCount) + { + printf("MiniCL validation FAILED\n"); + } else + { + printf("MiniCL validation SUCCESSFULL\n"); + } + // Free host memory, close log and return success + for (i = 0; i < 3; i++) + { + clReleaseMemObject(cmMemObjs[i]); + } + + free(srcA); + free(srcB); + free (dst); + printf("Press ENTER to quit\n"); + getchar(); +} + + diff --git a/Extras/RigidBodyGpuPipeline/opencl/vector_add/premake4.lua b/Extras/RigidBodyGpuPipeline/opencl/vector_add/premake4.lua new file mode 100644 index 000000000..56a16eed5 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/vector_add/premake4.lua @@ -0,0 +1,4 @@ + + include "AMD" +-- include "Intel" +-- include "NVIDIA" diff --git a/Extras/RigidBodyGpuPipeline/opencl/vector_add/stringify.py b/Extras/RigidBodyGpuPipeline/opencl/vector_add/stringify.py new file mode 100644 index 000000000..e79e281e4 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/vector_add/stringify.py @@ -0,0 +1,13 @@ +#!/usr/bin/env python +import sys +import os +import shutil + +arg = sys.argv[1] +fh = open(arg) + +print 'static const char* '+sys.argv[2]+'= \\' +for line in fh.readlines(): + a = line.strip('\n') + print '"'+a+'\\n"' +print ';' diff --git a/Extras/RigidBodyGpuPipeline/opencl/vector_add/stringifyVectorAddKernel.bat b/Extras/RigidBodyGpuPipeline/opencl/vector_add/stringifyVectorAddKernel.bat new file mode 100644 index 000000000..9b3913db1 --- /dev/null +++ b/Extras/RigidBodyGpuPipeline/opencl/vector_add/stringifyVectorAddKernel.bat @@ -0,0 +1,8 @@ +stringify.py VectorAddKernels.cl vectorAddCL >VectorAddKernels.h + + + + +@echo Warning: +@echo You might still need to find/replace for \\n (due to macros) and replace #include statements by their content +pause