diff --git a/examples/ExampleBrowser/ExampleEntries.cpp b/examples/ExampleBrowser/ExampleEntries.cpp index 3cc36badb..468d002e0 100644 --- a/examples/ExampleBrowser/ExampleEntries.cpp +++ b/examples/ExampleBrowser/ExampleEntries.cpp @@ -28,6 +28,7 @@ #include "../Experiments/ImplicitCloth/ImplicitClothExample.h" #include "../Importers/ImportBullet/SerializeSetup.h" #include "../Raycast/RaytestDemo.h" +#include "../OpenCL/broadphase/PairBench.h" struct ExampleEntry @@ -163,7 +164,10 @@ static ExampleEntry gDefaultExamples[]= ExampleEntry(1,"Implicit Cloth", "Cloth simulation using implicit integration, by Stan Melax. The cloth is only attached at the corners. Note the stability using a large time step even with high stiffness.", ImplicitClothCreateFunc), - +#ifdef B3_USE_CLEW + ExampleEntry(0,"OpenCL (experimental)"), + ExampleEntry(1,"Pair Bench", "Benchmark of overlapping pair search using OpenCL.", PairBenchOpenCLCreateFunc), +#endif // ExampleEntry(0,"Rendering"), ExampleEntry(1,"Instanced Rendering", "Simple example of fast instanced rendering, only active when using OpenGL3+.",RenderInstancingCreateFunc), ExampleEntry(1,"CoordinateSystemDemo","Show the axis and positive rotation direction around the axis.", CoordinateSystemCreateFunc), diff --git a/examples/ExampleBrowser/premake4.lua b/examples/ExampleBrowser/premake4.lua index 9d3192813..0ef8c69e8 100644 --- a/examples/ExampleBrowser/premake4.lua +++ b/examples/ExampleBrowser/premake4.lua @@ -1,6 +1,16 @@ project "App_ExampleBrowser" + hasCL = findOpenCL("clew") + + if (hasCL) then + + -- project ("App_Bullet3_OpenCL_Demos_" .. vendor) + + initOpenCL("clew") + + end + language "C++" kind "ConsoleApp" @@ -16,8 +26,20 @@ initOpenGL() initGlew() + if (hasCL) then + links { + "Bullet3OpenCL_clew", + "Bullet3Dynamics", + "Bullet3Collision", + "Bullet3Geometry", + "Bullet3Common", + } + end + defines {"INCLUDE_CLOTH_DEMOS"} + + files { "**.cpp", "**.h", @@ -66,6 +88,13 @@ "../ThirdPartyLibs/urdf/boost_replacement/string_split.h", } + + if (hasCL) then + files { + "../OpenCL/broadphase/*", + "../OpenCL/CommonOpenCL/*" + } + end if os.is("Linux") then initX11() @@ -74,3 +103,5 @@ end if os.is("MacOSX") then links{"Cocoa.framework"} end + + \ No newline at end of file diff --git a/examples/Experiments/ImplicitCloth/ImplicitClothExample.cpp b/examples/Experiments/ImplicitCloth/ImplicitClothExample.cpp index 7a3df9dd6..7e2d409de 100644 --- a/examples/Experiments/ImplicitCloth/ImplicitClothExample.cpp +++ b/examples/Experiments/ImplicitCloth/ImplicitClothExample.cpp @@ -26,7 +26,7 @@ struct ImplicitClothExample : public CommonExampleInterface struct GUIHelperInterface* m_guiHelper; int m_option; - Cloth* m_cloth = 0; + Cloth* m_cloth; @@ -34,7 +34,8 @@ struct ImplicitClothExample : public CommonExampleInterface public: ImplicitClothExample(struct GUIHelperInterface* helper, int option) :m_guiHelper(helper), - m_option(option) + m_option(option), + m_cloth(0) { } virtual void initPhysics(); diff --git a/examples/OpenCL/CommonOpenCL/CommonOpenCLBase.h b/examples/OpenCL/CommonOpenCL/CommonOpenCLBase.h new file mode 100644 index 000000000..c8f50e93f --- /dev/null +++ b/examples/OpenCL/CommonOpenCL/CommonOpenCLBase.h @@ -0,0 +1,171 @@ +#ifndef COMMON_MULTI_BODY_SETUP_H +#define COMMON_MULTI_BODY_SETUP_H + + +#include "../CommonInterfaces/CommonExampleInterface.h" +#include "../CommonInterfaces/CommonGUIHelperInterface.h" +#include "../CommonInterfaces/CommonRenderInterface.h" +#include "../CommonInterfaces/CommonGraphicsAppInterface.h" +#include "../CommonInterfaces/CommonWindowInterface.h" +#include "../CommonInterfaces/CommonCameraInterface.h" + +#include "GpuDemoInternalData.h" +#include "Bullet3Common/b3Scalar.h" +#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" + +struct CommonOpenCLBase : public CommonExampleInterface +{ + + struct GUIHelperInterface* m_guiHelper; + struct GpuDemoInternalData* m_clData; + + + CommonOpenCLBase (GUIHelperInterface* helper) + :m_guiHelper(helper), + m_clData(0) + { + m_clData = new GpuDemoInternalData(); + } + + virtual ~CommonOpenCLBase() + { + delete m_clData; + m_clData = 0; + } + + virtual void stepSimulation(float deltaTime) + { + } + + + + + + virtual void initCL(int preferredDeviceIndex, int preferredPlatformIndex) + { + void* glCtx=0; + void* glDC = 0; + + + + int ciErrNum = 0; + + cl_device_type deviceType = CL_DEVICE_TYPE_GPU; + //if (gAllowCpuOpenCL) + // deviceType = CL_DEVICE_TYPE_ALL; + + + + // if (useInterop) + // { + // m_data->m_clContext = b3OpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC); + // } else + { + m_clData->m_clContext = b3OpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex,&m_clData->m_platformId); + } + + + oclCHECKERROR(ciErrNum, CL_SUCCESS); + + int numDev = b3OpenCLUtils::getNumDevices(m_clData->m_clContext); + + if (numDev>0) + { + m_clData->m_clDevice= b3OpenCLUtils::getDevice(m_clData->m_clContext,0); + m_clData->m_clQueue = clCreateCommandQueue(m_clData->m_clContext, m_clData->m_clDevice, 0, &ciErrNum); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + + + b3OpenCLDeviceInfo info; + b3OpenCLUtils::getDeviceInfo(m_clData->m_clDevice,&info); + m_clData->m_clDeviceName = info.m_deviceName; + m_clData->m_clInitialized = true; + + } + } + + virtual void exitCL() + { + + if (m_clData && m_clData->m_clInitialized) + { + clReleaseCommandQueue(m_clData->m_clQueue); + clReleaseContext(m_clData->m_clContext); + m_clData->m_clInitialized = false; + } + } + + + + virtual void renderScene() + { + if (m_guiHelper->getRenderInterface()) + { + m_guiHelper->getRenderInterface()->renderScene(); + } + } + + virtual void physicsDebugDraw(int debugDrawFlags) + { + + + } + + virtual bool keyboardCallback(int key, int state) + { + return false;//don't handle this key + } + + + + virtual bool mouseMoveCallback(float x,float y) + { + return false; + } + + virtual bool mouseButtonCallback(int button, int state, float x, float y) + { + CommonRenderInterface* renderer = m_guiHelper->getRenderInterface(); + + if (!renderer) + { + b3Assert(0); + return false; + } + + CommonWindowInterface* window = m_guiHelper->getAppInterface()->m_window; + + + if (state==1) + { + if(button==0 && (!window->isModifierKeyPressed(B3G_ALT) && !window->isModifierKeyPressed(B3G_CONTROL) )) + { + /*btVector3 camPos; + renderer->getActiveCamera()->getCameraPosition(camPos); + + btVector3 rayFrom = camPos; + btVector3 rayTo = getRayTo(int(x),int(y)); + + pickBody(rayFrom, rayTo); + */ + + + } + } else + { + if (button==0) + { +// removePickingConstraint(); + //remove p2p + } + } + + //printf("button=%d, state=%d\n",button,state); + return false; + } + + + }; + +#endif //COMMON_MULTI_BODY_SETUP_H + diff --git a/examples/OpenCL/CommonOpenCL/GpuDemoInternalData.h b/examples/OpenCL/CommonOpenCL/GpuDemoInternalData.h new file mode 100644 index 000000000..be6584c35 --- /dev/null +++ b/examples/OpenCL/CommonOpenCL/GpuDemoInternalData.h @@ -0,0 +1,27 @@ +#ifndef GPU_DEMO_INTERNAL_DATA_H +#define GPU_DEMO_INTERNAL_DATA_H + +#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" + +struct GpuDemoInternalData +{ + cl_platform_id m_platformId; + cl_context m_clContext; + cl_device_id m_clDevice; + cl_command_queue m_clQueue; + bool m_clInitialized; + char* m_clDeviceName; + + GpuDemoInternalData() + :m_platformId(0), + m_clContext(0), + m_clDevice(0), + m_clQueue(0), + m_clInitialized(false), + m_clDeviceName(0) + { + + } +}; + +#endif diff --git a/examples/OpenCL/broadphase/PairBench.cpp b/examples/OpenCL/broadphase/PairBench.cpp new file mode 100644 index 000000000..b05bda95c --- /dev/null +++ b/examples/OpenCL/broadphase/PairBench.cpp @@ -0,0 +1,773 @@ +//those header files need to be at the top, because of conflict __global and STL + +#include "PairBench.h" +#include "Bullet3Common/b3Quaternion.h" + +#include "Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h" +#include "Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.h" +#include "Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.h" +#include "../Utils/b3Clock.h" + +//#include "../GpuDemoInternalData.h" +#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" + +#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" + +#include "../OpenGLWindow/OpenGLInclude.h" +#include "../OpenGLWindow/ShapeData.h" + +#include + +#include "pairsKernel.h" + + +#include "../CommonInterfaces/CommonExampleInterface.h" +#include "../CommonInterfaces/CommonGUIHelperInterface.h" +#include "../CommonInterfaces/CommonRenderInterface.h" +#include "../CommonInterfaces/CommonCameraInterface.h" +#include "../CommonInterfaces/CommonGraphicsAppInterface.h" +#include "../CommonInterfaces/CommonWindowInterface.h" +#include "../CommonOpenCL/CommonOpenCLBase.h" +#include "../OpenGLWindow/GLInstancingRenderer.h" +#include "../OpenGLWindow/GLInstanceRendererInternalData.h" + + + +class PairBench : public CommonOpenCLBase +{ + + struct PairBenchInternalData* m_data; + + GLInstancingRenderer* m_instancingRenderer; + +public: + + PairBench(GUIHelperInterface* helper); + virtual ~PairBench(); + + virtual void initPhysics(); + virtual void exitPhysics(); + + + + void createBroadphase(int xdim, int ydim, int zdim); + void deleteBroadphase(); + + virtual void stepSimulation(float deltaTime); + + virtual void renderScene(); + +}; + + + +//we use an offset, just for testing to make sure there is no assumption in the broadphase that 'index' starts at 0 +#define TEST_INDEX_OFFSET 1024 + + + + +char* gPairBenchFileName = 0; +extern bool useShadowMap; +float maxExtents = -1e30f; +int largeCount = 0; + +float timeStepPos = 0.000166666; +float mAmplitude = 251.f; +int dimensions[3]={10,10,10};//initialized with x_dim/y_dim/z_dim +const char* axisNames[3] = {"# x-axis","# y-axis","# z-axis"}; +extern bool gReset; + +static int curUseLargeAabbOption=0; +const char* useLargeAabbOptions[] = +{ + "NoLargeAabb", + "UseLargeAabb", +}; + +struct BroadphaseEntry +{ + const char* m_name; + b3GpuBroadphaseInterface::CreateFunc* m_createFunc; +}; + + +static PairBench* sPairDemo = 0; + +#define BP_COMBO_INDEX 123 + +static int curSelectedBroadphase = 0; +static BroadphaseEntry allBroadphases[]= +{ + {"Gpu Grid",b3GpuGridBroadphase::CreateFunc}, + {"Parallel Linear BVH",b3GpuParallelLinearBvhBroadphase::CreateFunc}, + {"CPU Brute Force",b3GpuSapBroadphase::CreateFuncBruteForceCpu}, + {"GPU Brute Force",b3GpuSapBroadphase::CreateFuncBruteForceGpu}, + {"GPU 1-SAP Original",b3GpuSapBroadphase::CreateFuncOriginal}, + {"GPU 1-SAP Barrier",b3GpuSapBroadphase::CreateFuncBarrier}, + {"GPU 1-SAP LDS",b3GpuSapBroadphase::CreateFuncLocalMemory} +}; + + +struct PairBenchInternalData +{ + b3GpuBroadphaseInterface* m_broadphaseGPU; + b3GpuBroadphaseInterface* m_validationBroadphase; + + cl_kernel m_moveObjectsKernel; + cl_kernel m_sineWaveKernel; + cl_kernel m_colorPairsKernel; + cl_kernel m_updateAabbSimple; + + + b3OpenCLArray* m_instancePosOrnColor; + b3OpenCLArray* m_bodyTimes; + PairBenchInternalData() + :m_broadphaseGPU(0), + m_moveObjectsKernel(0), + m_sineWaveKernel(0), + m_colorPairsKernel(0), + m_instancePosOrnColor(0), + m_bodyTimes(0), + m_updateAabbSimple(0) + { + } + + int m_oldYposition; + + +}; + + +PairBench::PairBench(GUIHelperInterface* helper) +:CommonOpenCLBase(helper) +{ + m_data = new PairBenchInternalData; + m_instancingRenderer = (GLInstancingRenderer*) helper->getRenderInterface(); + m_data->m_validationBroadphase = 0; +} +PairBench::~PairBench() +{ + delete m_data; +} + + + + + + +static inline float parseFloat(const char*& token) +{ + token += strspn(token, " \t"); + float f = (float)atof(token); + token += strcspn(token, " \t\r"); + return f; +} + +enum PairToggleButtons +{ + MY_RESET = 1024, +}; + + +#define PAIRS_CL_PROGRAM_PATH "Demos3/GpuDemos/broadphase/pairsKernel.cl" + + + + + +void PairBench::initPhysics() +{ + dimensions[0] = 10; + dimensions[1] = 10; + dimensions[2] = 10; + + //m_instancingRenderer = ci.m_instancingRenderer; + sPairDemo = this; + useShadowMap = false; + + + int startItem = 0; + + int preferredOpenCLDeviceIndex=-1; + int preferredOpenCLPlatformIndex=-1; + + initCL(preferredOpenCLDeviceIndex,preferredOpenCLPlatformIndex); + + if (m_clData->m_clContext) + { + cl_int err; + cl_program pairBenchProg=b3OpenCLUtils::compileCLProgramFromString(m_clData->m_clContext,m_clData->m_clDevice,pairsKernelsCL,&err,"",PAIRS_CL_PROGRAM_PATH); + int errNum=0; + m_data->m_moveObjectsKernel = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,pairsKernelsCL,"moveObjectsKernel",&errNum,pairBenchProg); + m_data->m_sineWaveKernel = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,pairsKernelsCL,"sineWaveKernel",&errNum,pairBenchProg); + m_data->m_colorPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,pairsKernelsCL,"colorPairsKernel2",&errNum,pairBenchProg); + m_data->m_updateAabbSimple = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,pairsKernelsCL,"updateAabbSimple",&errNum,pairBenchProg); + + //Method for validating the overlapping pairs requires that the + //reference broadphase does not maintain internal state aside from AABB data. + //That is, overwriting the AABB state in the broadphase using + // b3GpuBroadphaseInterface::getAllAabbsGPU(), + // b3GpuBroadphaseInterface::getSmallAabbIndicesGPU(), and + // b3GpuBroadphaseInterface::getLargeAabbIndicesGPU() + //and then calling b3GpuBroadphaseInterface::calculateOverlappingPairs() should + //always produce the same result regardless of the current state of the broadphase. + m_data->m_validationBroadphase = b3GpuParallelLinearBvhBroadphase::CreateFunc(m_clData->m_clContext,m_clData->m_clDevice,m_clData->m_clQueue); + } + + createBroadphase(dimensions[0],dimensions[1],dimensions[2]); + +} + +void PairBench::createBroadphase(int arraySizeX, int arraySizeY, int arraySizeZ) +{ + + + m_data->m_broadphaseGPU = (allBroadphases[curSelectedBroadphase].m_createFunc)(m_clData->m_clContext,m_clData->m_clDevice,m_clData->m_clQueue); + + int strideInBytes = 9*sizeof(float); + int numVertices = sizeof(cube_vertices)/strideInBytes; + int numIndices = sizeof(cube_vertices)/sizeof(int); + int shapeId = m_instancingRenderer->registerShape(&cube_vertices[0],numVertices,cube_indices,numIndices); + int group=1; + int mask=1; + int index=TEST_INDEX_OFFSET; + + + if (gPairBenchFileName) + { + + + //char* fileName = "32006GPUAABBs.txt"; + char relativeFileName[1024]; + const char* prefix[]={"./data/","../data/","../../data/","../../../data/","../../../../data/"}; + int prefixIndex=-1; + { + + int numPrefixes = sizeof(prefix)/sizeof(char*); + + for (int i=0;i500) + { + b3Vector4 color=b3MakeVector4(0,1,0,0.1); + int id = m_instancingRenderer->registerGraphicsInstance(shapeId,position,orn,color,scaling); + m_data->m_broadphaseGPU->createLargeProxy(aabbMin,aabbMax,index,group,mask); + } else + { + b3Vector4 color=b3MakeVector4(1,0,0,1); + int id = m_instancingRenderer->registerGraphicsInstance(shapeId,position,orn,color,scaling); + m_data->m_broadphaseGPU->createProxy(aabbMin,aabbMax,index,group,mask); + index++; + } + + + + + patcnt++; + } + } + prefixIndex = i; + break; + } + + } + + if (prefixIndex<0) + { + b3Printf("Cannot find %s\n",gPairBenchFileName); + } + + } + + + } + else + { + for (int i=0;iregisterGraphicsInstance(shapeId,position,orn,color,scaling); + + + b3Vector3 aabbMin = position-scaling; + b3Vector3 aabbMax = position+scaling; + + if (large) + { + m_data->m_broadphaseGPU->createLargeProxy(aabbMin,aabbMax,index,group,mask); + + } else + { + m_data->m_broadphaseGPU->createProxy(aabbMin,aabbMax,index,group,mask); + } + index++; + } + } + } + } + + float camPos[4]={15.5,12.5,15.5,0}; + m_instancingRenderer->getActiveCamera()->setCameraTargetPosition(camPos[0],camPos[1],camPos[2]); + if (gPairBenchFileName) + { + m_instancingRenderer->getActiveCamera()->setCameraDistance(830); + } else + { + m_instancingRenderer->getActiveCamera()->setCameraDistance(130); + } + + m_instancingRenderer->writeTransforms(); + m_data->m_broadphaseGPU->writeAabbsToGpu(); + +} + +void PairBench::deleteBroadphase() +{ + delete m_data->m_broadphaseGPU; + m_data->m_broadphaseGPU = 0; + delete m_data->m_instancePosOrnColor; + m_data->m_instancePosOrnColor = 0; + delete m_data->m_bodyTimes; + m_data->m_bodyTimes = 0; + + m_data->m_broadphaseGPU = 0; + m_instancingRenderer->removeAllInstances(); +} + +void PairBench::exitPhysics() +{ + if(m_data->m_validationBroadphase) + { + delete m_data->m_validationBroadphase; + m_data->m_validationBroadphase = 0; + } + + sPairDemo = 0; + + exitCL(); + +} + + +void PairBench::renderScene() +{ + m_instancingRenderer->renderScene(); +} + +struct OverlappingPairSortPredicate +{ + inline bool operator() (const b3Int4& a, const b3Int4& b) const + { + if(a.x != b.x) return (a.x < b.x); + if(a.y != b.y) return (a.y < b.y); + if(a.z != b.z) return (a.z < b.z); + return (a.w < b.w); + } +}; + +void PairBench::stepSimulation(float deltaTime) +{ + //color all objects blue + + bool animate=true; + int numObjects= 0; + { + B3_PROFILE("Num Objects"); + numObjects = m_instancingRenderer->getInternalData()->m_totalNumInstances; + } + b3Vector4* positions = 0; + if (numObjects) + { + B3_PROFILE("Sync"); + GLuint vbo = m_instancingRenderer->getInternalData()->m_vbo; + + + + int arraySizeInBytes = numObjects * (3)*sizeof(b3Vector4); + + glBindBuffer(GL_ARRAY_BUFFER, vbo); + cl_bool blocking= CL_TRUE; + char* hostPtr= 0; + { + B3_PROFILE("glMapBufferRange"); + hostPtr = (char*)glMapBufferRange( GL_ARRAY_BUFFER,m_instancingRenderer->getMaxShapeCapacity(),arraySizeInBytes, GL_MAP_WRITE_BIT|GL_MAP_READ_BIT );//GL_READ_WRITE);//GL_WRITE_ONLY + } + GLint err = glGetError(); + assert(err==GL_NO_ERROR); + positions = (b3Vector4*)hostPtr; + + if (m_data->m_instancePosOrnColor && m_data->m_instancePosOrnColor->size() != 3*numObjects) + { + delete m_data->m_instancePosOrnColor; + m_data->m_instancePosOrnColor=0; + } + if (!m_data->m_instancePosOrnColor) + { + m_data->m_instancePosOrnColor = new b3OpenCLArray(m_clData->m_clContext,m_clData->m_clQueue); + m_data->m_instancePosOrnColor->resize(3*numObjects); + m_data->m_instancePosOrnColor->copyFromHostPointer(positions,3*numObjects,0); + m_data->m_bodyTimes = new b3OpenCLArray(m_clData->m_clContext,m_clData->m_clQueue); + m_data->m_bodyTimes ->resize(numObjects); + b3AlignedObjectArray tmp; + tmp.resize(numObjects); + for (int i=0;im_bodyTimes->copyFromHost(tmp); + } + + if (!gPairBenchFileName) + { + if (1) + { + if (1) + { + + b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_sineWaveKernel,"m_sineWaveKernel"); + launcher.setBuffer(m_data->m_instancePosOrnColor->getBufferCL() ); + launcher.setBuffer(m_data->m_bodyTimes->getBufferCL() ); + + launcher.setConst(timeStepPos); + launcher.setConst(mAmplitude); + launcher.setConst( numObjects); + launcher.launch1D( numObjects); + clFinish(m_clData->m_clQueue); + } + else + { + + b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_moveObjectsKernel,"m_moveObjectsKernel"); + launcher.setBuffer(m_data->m_instancePosOrnColor->getBufferCL() ); + launcher.setConst( numObjects); + launcher.launch1D( numObjects); + clFinish(m_clData->m_clQueue); + } + } + } + } + + bool updateOnGpu=true; + + if (1) + { + if (updateOnGpu) + { + B3_PROFILE("updateOnGpu"); + b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_updateAabbSimple,"m_updateAabbSimple"); + launcher.setBuffer(m_data->m_instancePosOrnColor->getBufferCL() ); + launcher.setConst( numObjects); + launcher.setBuffer(m_data->m_broadphaseGPU->getAabbBufferWS()); + launcher.launch1D( numObjects); + clFinish(m_clData->m_clQueue); + + } else + { + B3_PROFILE("updateOnCpu"); + if (!gPairBenchFileName) + { + int allAabbs = m_data->m_broadphaseGPU->getAllAabbsCPU().size(); + + + b3AlignedObjectArray posOrnColorsCpu; + if (m_data->m_instancePosOrnColor) + m_data->m_instancePosOrnColor->copyToHost(posOrnColorsCpu); + + + + for (int nodeId=0;nodeIdm_broadphaseGPU->getAllAabbsCPU()[nodeId]; + b3Vector3 halfExtents = 0.5f*(orgAabb.m_maxVec-orgAabb.m_minVec); + int orgNodeIndex = orgAabb.m_minIndices[3]; + int orgBroadphaseIndex = orgAabb.m_signedMaxIndices[3]; + + m_data->m_broadphaseGPU->getAllAabbsCPU()[nodeId].m_minVec = position-halfExtents; + m_data->m_broadphaseGPU->getAllAabbsCPU()[nodeId].m_minIndices[3] = orgNodeIndex; + m_data->m_broadphaseGPU->getAllAabbsCPU()[nodeId].m_maxVec = position+halfExtents; + m_data->m_broadphaseGPU->getAllAabbsCPU()[nodeId].m_signedMaxIndices[3]= orgBroadphaseIndex; + } + } + m_data->m_broadphaseGPU->writeAabbsToGpu(); + } + + + + } + } + + int prealloc = 3*1024*1024; + int maxOverlap = b3Min(prealloc,16*numObjects); + + unsigned long dt = 0; + if (numObjects) + { + b3Clock cl; + dt = cl.getTimeMicroseconds(); + B3_PROFILE("calculateOverlappingPairs"); + int sz = sizeof(b3Int4)*64*numObjects; + + + m_data->m_broadphaseGPU->calculateOverlappingPairs(maxOverlap); + int numPairs = m_data->m_broadphaseGPU->getNumOverlap(); + //printf("numPairs = %d\n", numPairs); + dt = cl.getTimeMicroseconds()-dt; + + } + + const bool VALIDATE_BROADPHASE = false; //Check that overlapping pairs of 2 broadphases are the same + if(numObjects && VALIDATE_BROADPHASE) + { + B3_PROFILE("validate broadphases"); + + { + B3_PROFILE("calculateOverlappingPairs m_validationBroadphase"); + //m_data->m_validationBroadphase->getAllAabbsCPU() = m_data->m_broadphaseGPU->getAllAabbsCPU(); + + m_data->m_validationBroadphase->getAllAabbsGPU().copyFromOpenCLArray( m_data->m_broadphaseGPU->getAllAabbsGPU() ); + m_data->m_validationBroadphase->getSmallAabbIndicesGPU().copyFromOpenCLArray( m_data->m_broadphaseGPU->getSmallAabbIndicesGPU() ); + m_data->m_validationBroadphase->getLargeAabbIndicesGPU().copyFromOpenCLArray( m_data->m_broadphaseGPU->getLargeAabbIndicesGPU() ); + + m_data->m_validationBroadphase->calculateOverlappingPairs(maxOverlap); + } + + static b3AlignedObjectArray overlappingPairs; + static b3AlignedObjectArray overlappingPairsReference; + m_data->m_broadphaseGPU->getOverlappingPairsGPU().copyToHost(overlappingPairs); + m_data->m_validationBroadphase->getOverlappingPairsGPU().copyToHost(overlappingPairsReference); + + //Reorder pairs so that (pair.x < pair.y) is always true + { + B3_PROFILE("reorder pairs"); + + for(int i = 0; i < overlappingPairs.size(); ++i) + { + b3Int4 pair = overlappingPairs[i]; + if(pair.x > pair.y) + { + b3Swap(pair.x, pair.y); + b3Swap(pair.z, pair.w); + overlappingPairs[i] = pair; + } + } + for(int i = 0; i < overlappingPairsReference.size(); ++i) + { + b3Int4 pair = overlappingPairsReference[i]; + if(pair.x > pair.y) + { + b3Swap(pair.x, pair.y); + b3Swap(pair.z, pair.w); + overlappingPairsReference[i] = pair; + } + } + } + + // + { + B3_PROFILE("Sort overlapping pairs from most to least significant bit"); + + overlappingPairs.quickSort( OverlappingPairSortPredicate() ); + overlappingPairsReference.quickSort( OverlappingPairSortPredicate() ); + } + + //Compare + { + B3_PROFILE("compare pairs"); + + int numPairs = overlappingPairs.size(); + int numPairsReference = overlappingPairsReference.size(); + + bool success = true; + + if(numPairs == numPairsReference) + { + for(int i = 0; i < numPairsReference; ++i) + { + const b3Int4& pairA = overlappingPairs[i]; + const b3Int4& pairB = overlappingPairsReference[i]; + if( pairA.x != pairB.x + || pairA.y != pairB.y + || pairA.z != pairB.z + || pairA.w != pairB.w ) + { + b3Error("Error: one or more overlappingPairs differs from reference.\n"); + success = false; + break; + } + } + } + else + { + b3Error("Error: numPairs %d != numPairsReference %d \n", numPairs, numPairsReference); + success = false; + } + + printf("Broadphase validation: %d \n", success); + } + } + + /* + if (m_data->m_gui) + { + B3_PROFILE("update Gui"); + int allAabbs = m_data->m_broadphaseGPU->getAllAabbsCPU().size(); + int numOverlap = m_data->m_broadphaseGPU->getNumOverlap(); + + float time = dt/1000.f; + //printf("time = %f\n", time); + + char msg[1024]; + sprintf(msg,"#objects = %d, #overlapping pairs = %d, time = %f ms", allAabbs,numOverlap,time ); + //printf("msg=%s\n",msg); + m_data->m_gui->setStatusBarMessage(msg,true); + } + */ + + if (numObjects) + { + B3_PROFILE("animate"); + GLint err = glGetError(); + assert(err==GL_NO_ERROR); + //color overlapping objects in red + + + if (m_data->m_broadphaseGPU->getNumOverlap()) + { + bool colorPairsOnHost = false; + if (colorPairsOnHost ) + { + + } else + { + int numPairs = m_data->m_broadphaseGPU->getNumOverlap(); + cl_mem pairBuf = m_data->m_broadphaseGPU->getOverlappingPairBuffer(); + + b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_colorPairsKernel,"m_colorPairsKernel"); + launcher.setBuffer(m_data->m_instancePosOrnColor->getBufferCL() ); + launcher.setConst( numObjects); + launcher.setBuffer( pairBuf); + int indexOffset = TEST_INDEX_OFFSET; + launcher.setConst(indexOffset); + launcher.setConst( numPairs); + launcher.launch1D( numPairs); + clFinish(m_clData->m_clQueue); + } + } + + if (numObjects) + { + m_data->m_instancePosOrnColor->copyToHostPointer(positions,3*numObjects,0); + } + + glUnmapBuffer( GL_ARRAY_BUFFER); + err = glGetError(); + assert(err==GL_NO_ERROR); + } + +} + +class CommonExampleInterface* PairBenchOpenCLCreateFunc(struct PhysicsInterface* pint, struct GUIHelperInterface* helper, int option) +{ + return new PairBench(helper); +} diff --git a/examples/OpenCL/broadphase/PairBench.h b/examples/OpenCL/broadphase/PairBench.h new file mode 100644 index 000000000..def128055 --- /dev/null +++ b/examples/OpenCL/broadphase/PairBench.h @@ -0,0 +1,8 @@ +#ifndef PAIR_BENCH_H +#define PAIR_BENCH_H + +class CommonExampleInterface* PairBenchOpenCLCreateFunc(struct PhysicsInterface* pint, struct GUIHelperInterface* helper, int option); + + +#endif + diff --git a/examples/OpenCL/broadphase/pairsKernel.cl b/examples/OpenCL/broadphase/pairsKernel.cl new file mode 100644 index 000000000..b99a22e6b --- /dev/null +++ b/examples/OpenCL/broadphase/pairsKernel.cl @@ -0,0 +1,79 @@ +__kernel void moveObjectsKernel(__global float4* posOrnColors, int numObjects) +{ + int iGID = get_global_id(0); + if (iGID>=numObjects) + return; + __global float4* positions = &posOrnColors[0]; + if (iGID<0.5*numObjects) + { + positions[iGID].y +=0.01f; + } + __global float4* colors = &posOrnColors[numObjects*2]; + colors[iGID] = (float4)(0,0,1,1); +} + +__kernel void colorPairsKernel2(__global float4* posOrnColors, int numObjects, __global const int4* pairs, int indexOffset, int numPairs) +{ + int iPairId = get_global_id(0); + if (iPairId>=numPairs) + return; + __global float4* colors = &posOrnColors[numObjects*2]; + + int iObjectA = pairs[iPairId].x-indexOffset; + int iObjectB = pairs[iPairId].y-indexOffset; + colors[iObjectA] = (float4)(1,0,0,1); + colors[iObjectB] = (float4)(1,0,0,1); +} + +__kernel void + sineWaveKernel( __global float4* posOrnColors, __global float* pBodyTimes,float timeStepPos, float mAmplitude,const int numNodes) +{ + int nodeID = get_global_id(0); + if( nodeID < numNodes ) + { + pBodyTimes[nodeID] += timeStepPos; + float4 position = posOrnColors[nodeID]; + position.x = native_cos(pBodyTimes[nodeID]*2.17f)*mAmplitude + native_sin(pBodyTimes[nodeID])*mAmplitude*0.5f; + position.y = native_cos(pBodyTimes[nodeID]*1.38f)*mAmplitude + native_sin(pBodyTimes[nodeID]*mAmplitude); + position.z = native_cos(pBodyTimes[nodeID]*2.17f)*mAmplitude + native_sin(pBodyTimes[nodeID]*0.777f)*mAmplitude; + + posOrnColors[nodeID] = position; + __global float4* colors = &posOrnColors[numNodes*2]; + colors[nodeID] = (float4)(0,0,1,1); + } +} + +typedef struct +{ + float fx; + float fy; + float fz; + int uw; +} b3AABBCL; + +__kernel void updateAabbSimple( __global float4* posOrnColors, const int numNodes, __global b3AABBCL* pAABB) +{ + int nodeId = get_global_id(0); + if( nodeId < numNodes ) + { + + b3AABBCL orgAabbMin = pAABB[nodeId*2]; + b3AABBCL orgAabbMax = pAABB[nodeId*2+1]; + int orgNodeId = orgAabbMin.uw; + int orgBroadphaseIndex = orgAabbMax.uw; + + float4 position = posOrnColors[nodeId]; + float4 argAabbMinVec = (float4)(orgAabbMin.fx,orgAabbMin.fy,orgAabbMin.fz,0.f); + float4 argAabbMaxVec = (float4)(orgAabbMax.fx,orgAabbMax.fy,orgAabbMax.fz,0.f); + float4 halfExtents = 0.5f*(argAabbMaxVec-argAabbMinVec); + + pAABB[nodeId*2].fx = position.x-halfExtents.x; + pAABB[nodeId*2].fy = position.y-halfExtents.y; + pAABB[nodeId*2].fz = position.z-halfExtents.z; + pAABB[nodeId*2].uw = orgNodeId; + pAABB[nodeId*2+1].fx = position.x+halfExtents.x; + pAABB[nodeId*2+1].fy = position.y+halfExtents.y; + pAABB[nodeId*2+1].fz = position.z+halfExtents.z; + pAABB[nodeId*2+1].uw = orgBroadphaseIndex; + } +} diff --git a/examples/OpenCL/broadphase/pairsKernel.h b/examples/OpenCL/broadphase/pairsKernel.h new file mode 100644 index 000000000..2a98f2221 --- /dev/null +++ b/examples/OpenCL/broadphase/pairsKernel.h @@ -0,0 +1,77 @@ +//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project +static const char* pairsKernelsCL= \ +"__kernel void moveObjectsKernel(__global float4* posOrnColors, int numObjects)\n" +"{\n" +" int iGID = get_global_id(0);\n" +" if (iGID>=numObjects)\n" +" return;\n" +" __global float4* positions = &posOrnColors[0];\n" +" if (iGID<0.5*numObjects)\n" +" {\n" +" positions[iGID].y +=0.01f;\n" +" }\n" +" __global float4* colors = &posOrnColors[numObjects*2];\n" +" colors[iGID] = (float4)(0,0,1,1);\n" +"}\n" +"__kernel void colorPairsKernel2(__global float4* posOrnColors, int numObjects, __global const int4* pairs, int indexOffset, int numPairs)\n" +"{\n" +" int iPairId = get_global_id(0);\n" +" if (iPairId>=numPairs)\n" +" return;\n" +" __global float4* colors = &posOrnColors[numObjects*2];\n" +" int iObjectA = pairs[iPairId].x-indexOffset;\n" +" int iObjectB = pairs[iPairId].y-indexOffset;\n" +" colors[iObjectA] = (float4)(1,0,0,1);\n" +" colors[iObjectB] = (float4)(1,0,0,1);\n" +"}\n" +"__kernel void \n" +" sineWaveKernel( __global float4* posOrnColors, __global float* pBodyTimes,float timeStepPos, float mAmplitude,const int numNodes)\n" +"{\n" +" int nodeID = get_global_id(0);\n" +" if( nodeID < numNodes )\n" +" {\n" +" pBodyTimes[nodeID] += timeStepPos;\n" +" float4 position = posOrnColors[nodeID];\n" +" position.x = native_cos(pBodyTimes[nodeID]*2.17f)*mAmplitude + native_sin(pBodyTimes[nodeID])*mAmplitude*0.5f;\n" +" position.y = native_cos(pBodyTimes[nodeID]*1.38f)*mAmplitude + native_sin(pBodyTimes[nodeID]*mAmplitude);\n" +" position.z = native_cos(pBodyTimes[nodeID]*2.17f)*mAmplitude + native_sin(pBodyTimes[nodeID]*0.777f)*mAmplitude;\n" +" \n" +" posOrnColors[nodeID] = position;\n" +" __global float4* colors = &posOrnColors[numNodes*2];\n" +" colors[nodeID] = (float4)(0,0,1,1);\n" +" }\n" +"}\n" +"typedef struct \n" +"{\n" +" float fx;\n" +" float fy;\n" +" float fz;\n" +" int uw;\n" +"} b3AABBCL;\n" +"__kernel void updateAabbSimple( __global float4* posOrnColors, const int numNodes, __global b3AABBCL* pAABB)\n" +"{\n" +" int nodeId = get_global_id(0);\n" +" if( nodeId < numNodes )\n" +" {\n" +" \n" +" b3AABBCL orgAabbMin = pAABB[nodeId*2];\n" +" b3AABBCL orgAabbMax = pAABB[nodeId*2+1];\n" +" int orgNodeId = orgAabbMin.uw;\n" +" int orgBroadphaseIndex = orgAabbMax.uw;\n" +" \n" +" float4 position = posOrnColors[nodeId];\n" +" float4 argAabbMinVec = (float4)(orgAabbMin.fx,orgAabbMin.fy,orgAabbMin.fz,0.f);\n" +" float4 argAabbMaxVec = (float4)(orgAabbMax.fx,orgAabbMax.fy,orgAabbMax.fz,0.f);\n" +" float4 halfExtents = 0.5f*(argAabbMaxVec-argAabbMinVec);\n" +" \n" +" pAABB[nodeId*2].fx = position.x-halfExtents.x;\n" +" pAABB[nodeId*2].fy = position.y-halfExtents.y;\n" +" pAABB[nodeId*2].fz = position.z-halfExtents.z;\n" +" pAABB[nodeId*2].uw = orgNodeId;\n" +" pAABB[nodeId*2+1].fx = position.x+halfExtents.x;\n" +" pAABB[nodeId*2+1].fy = position.y+halfExtents.y;\n" +" pAABB[nodeId*2+1].fz = position.z+halfExtents.z;\n" +" pAABB[nodeId*2+1].uw = orgBroadphaseIndex; \n" +" }\n" +"}\n" +;