Add the GPU rigid body pipeline from https://github.com/erwincoumans/experiments as a Bullet 3.x preview for Bullet 2.80
This commit is contained in:
@@ -0,0 +1,29 @@
|
||||
|
||||
hasCL = findOpenCL_AMD()
|
||||
|
||||
if (hasCL) then
|
||||
|
||||
project "OpenCL_bt3dGridBroadphase_AMD"
|
||||
|
||||
initOpenCL_AMD()
|
||||
|
||||
language "C++"
|
||||
|
||||
kind "StaticLib"
|
||||
targetdir "../../../bin"
|
||||
|
||||
libdirs {"../../../rendering/GlutGlewWindows"}
|
||||
|
||||
includedirs {
|
||||
-- "../../../rendering/GlutGlewWindows",
|
||||
"../../../opencl/3dGridBroadphase/Shared",
|
||||
"../../../../../src",
|
||||
"../../primitives"
|
||||
}
|
||||
|
||||
files {
|
||||
"../Shared/*.cpp",
|
||||
"../Shared/*.h"
|
||||
}
|
||||
|
||||
end
|
||||
@@ -0,0 +1,23 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library
|
||||
Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
#include <MiniCL/cl_MiniCL_Defs.h>
|
||||
|
||||
extern "C"
|
||||
{
|
||||
#define MSTRINGIFY(A) A
|
||||
#include "bt3dGridBroadphaseOCL.cl"
|
||||
#undef MSTRINGIFY
|
||||
}
|
||||
@@ -0,0 +1,349 @@
|
||||
|
||||
MSTRINGIFY(
|
||||
|
||||
int getPosHash(int4 gridPos, __global float4* pParams)
|
||||
{
|
||||
int4 gridDim = *((__global int4*)(pParams + 1));
|
||||
gridPos.x &= gridDim.x - 1;
|
||||
gridPos.y &= gridDim.y - 1;
|
||||
gridPos.z &= gridDim.z - 1;
|
||||
int hash = gridPos.z * gridDim.y * gridDim.x + gridPos.y * gridDim.x + gridPos.x;
|
||||
return hash;
|
||||
}
|
||||
|
||||
int4 getGridPos(float4 worldPos, __global float4* pParams)
|
||||
{
|
||||
int4 gridPos;
|
||||
int4 gridDim = *((__global int4*)(pParams + 1));
|
||||
gridPos.x = (int)floor(worldPos.x * pParams[0].x) & (gridDim.x - 1);
|
||||
gridPos.y = (int)floor(worldPos.y * pParams[0].y) & (gridDim.y - 1);
|
||||
gridPos.z = (int)floor(worldPos.z * pParams[0].z) & (gridDim.z - 1);
|
||||
return gridPos;
|
||||
}
|
||||
|
||||
|
||||
// calculate grid hash value for each body using its AABB
|
||||
__kernel void kCalcHashAABB(int numObjects, __global float4* pAABB, __global int2* pHash, __global float4* pParams GUID_ARG)
|
||||
{
|
||||
int index = get_global_id(0);
|
||||
if(index >= numObjects)
|
||||
{
|
||||
return;
|
||||
}
|
||||
float4 bbMin = pAABB[index*2];
|
||||
float4 bbMax = pAABB[index*2 + 1];
|
||||
float4 pos;
|
||||
pos.x = (bbMin.x + bbMax.x) * 0.5f;
|
||||
pos.y = (bbMin.y + bbMax.y) * 0.5f;
|
||||
pos.z = (bbMin.z + bbMax.z) * 0.5f;
|
||||
pos.w = 0.f;
|
||||
// get address in grid
|
||||
int4 gridPos = getGridPos(pos, pParams);
|
||||
int gridHash = getPosHash(gridPos, pParams);
|
||||
// store grid hash and body index
|
||||
int2 hashVal;
|
||||
hashVal.x = gridHash;
|
||||
hashVal.y = index;
|
||||
pHash[index] = hashVal;
|
||||
}
|
||||
|
||||
__kernel void kClearCellStart( int numCells,
|
||||
__global int* pCellStart GUID_ARG)
|
||||
{
|
||||
int index = get_global_id(0);
|
||||
if(index >= numCells)
|
||||
{
|
||||
return;
|
||||
}
|
||||
pCellStart[index] = -1;
|
||||
}
|
||||
|
||||
__kernel void kFindCellStart(int numObjects, __global int2* pHash, __global int* cellStart GUID_ARG)
|
||||
{
|
||||
__local int sharedHash[513];
|
||||
int index = get_global_id(0);
|
||||
int2 sortedData;
|
||||
if(index < numObjects)
|
||||
{
|
||||
sortedData = pHash[index];
|
||||
// Load hash data into shared memory so that we can look
|
||||
// at neighboring body's hash value without loading
|
||||
// two hash values per thread
|
||||
sharedHash[get_local_id(0) + 1] = sortedData.x;
|
||||
if((index > 0) && (get_local_id(0) == 0))
|
||||
{
|
||||
// first thread in block must load neighbor body hash
|
||||
sharedHash[0] = pHash[index-1].x;
|
||||
}
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(index < numObjects)
|
||||
{
|
||||
if((index == 0) || (sortedData.x != sharedHash[get_local_id(0)]))
|
||||
{
|
||||
cellStart[sortedData.x] = index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int testAABBOverlap(float4 min0, float4 max0, float4 min1, float4 max1)
|
||||
{
|
||||
return (min0.x <= max1.x)&& (min1.x <= max0.x) &&
|
||||
(min0.y <= max1.y)&& (min1.y <= max0.y) &&
|
||||
(min0.z <= max1.z)&& (min1.z <= max0.z);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void findPairsInCell( int numObjects,
|
||||
int4 gridPos,
|
||||
int index,
|
||||
__global int2* pHash,
|
||||
__global int* pCellStart,
|
||||
__global float4* pAABB,
|
||||
__global int* pPairBuff,
|
||||
__global int2* pPairBuffStartCurr,
|
||||
__global float4* pParams)
|
||||
{
|
||||
int4 pGridDim = *((__global int4*)(pParams + 1));
|
||||
int maxBodiesPerCell = pGridDim.w;
|
||||
int gridHash = getPosHash(gridPos, pParams);
|
||||
// get start of bucket for this cell
|
||||
int bucketStart = pCellStart[gridHash];
|
||||
if (bucketStart == -1)
|
||||
{
|
||||
return; // cell empty
|
||||
}
|
||||
// iterate over bodies in this cell
|
||||
int2 sortedData = pHash[index];
|
||||
int unsorted_indx = sortedData.y;
|
||||
float4 min0 = pAABB[unsorted_indx*2 + 0];
|
||||
float4 max0 = pAABB[unsorted_indx*2 + 1];
|
||||
int handleIndex = as_int(min0.w);
|
||||
int2 start_curr = pPairBuffStartCurr[handleIndex];
|
||||
int start = start_curr.x;
|
||||
int curr = start_curr.y;
|
||||
int2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
|
||||
int curr_max = start_curr_next.x - start - 1;
|
||||
int bucketEnd = bucketStart + maxBodiesPerCell;
|
||||
bucketEnd = (bucketEnd > numObjects) ? numObjects : bucketEnd;
|
||||
for(int index2 = bucketStart; index2 < bucketEnd; index2++)
|
||||
{
|
||||
int2 cellData = pHash[index2];
|
||||
if (cellData.x != gridHash)
|
||||
{
|
||||
break; // no longer in same bucket
|
||||
}
|
||||
int unsorted_indx2 = cellData.y;
|
||||
if (unsorted_indx2 < unsorted_indx) // check not colliding with self
|
||||
{
|
||||
float4 min1 = pAABB[unsorted_indx2*2 + 0];
|
||||
float4 max1 = pAABB[unsorted_indx2*2 + 1];
|
||||
if(testAABBOverlap(min0, max0, min1, max1))
|
||||
{
|
||||
int handleIndex2 = as_int(min1.w);
|
||||
int k;
|
||||
for(k = 0; k < curr; k++)
|
||||
{
|
||||
int old_pair = pPairBuff[start+k] & (~0x60000000);
|
||||
if(old_pair == handleIndex2)
|
||||
{
|
||||
pPairBuff[start+k] |= 0x40000000;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(k == curr)
|
||||
{
|
||||
if(curr >= curr_max)
|
||||
{ // not a good solution, but let's avoid crash
|
||||
break;
|
||||
}
|
||||
pPairBuff[start+curr] = handleIndex2 | 0x20000000;
|
||||
curr++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
int2 newStartCurr;
|
||||
newStartCurr.x = start;
|
||||
newStartCurr.y = curr;
|
||||
pPairBuffStartCurr[handleIndex] = newStartCurr;
|
||||
return;
|
||||
}
|
||||
|
||||
__kernel void kFindOverlappingPairs( int numObjects,
|
||||
__global float4* pAABB,
|
||||
__global int2* pHash,
|
||||
__global int* pCellStart,
|
||||
__global int* pPairBuff,
|
||||
__global int2* pPairBuffStartCurr,
|
||||
__global float4* pParams GUID_ARG)
|
||||
|
||||
{
|
||||
int index = get_global_id(0);
|
||||
if(index >= numObjects)
|
||||
{
|
||||
return;
|
||||
}
|
||||
int2 sortedData = pHash[index];
|
||||
int unsorted_indx = sortedData.y;
|
||||
float4 bbMin = pAABB[unsorted_indx*2 + 0];
|
||||
float4 bbMax = pAABB[unsorted_indx*2 + 1];
|
||||
float4 pos;
|
||||
pos.x = (bbMin.x + bbMax.x) * 0.5f;
|
||||
pos.y = (bbMin.y + bbMax.y) * 0.5f;
|
||||
pos.z = (bbMin.z + bbMax.z) * 0.5f;
|
||||
// get address in grid
|
||||
int4 gridPosA = getGridPos(pos, pParams);
|
||||
int4 gridPosB;
|
||||
// examine only neighbouring cells
|
||||
for(int z=-1; z<=1; z++)
|
||||
{
|
||||
gridPosB.z = gridPosA.z + z;
|
||||
for(int y=-1; y<=1; y++)
|
||||
{
|
||||
gridPosB.y = gridPosA.y + y;
|
||||
for(int x=-1; x<=1; x++)
|
||||
{
|
||||
gridPosB.x = gridPosA.x + x;
|
||||
findPairsInCell(numObjects, gridPosB, index, pHash, pCellStart, pAABB, pPairBuff, pPairBuffStartCurr, pParams);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void kFindPairsLarge( int numObjects,
|
||||
__global float4* pAABB,
|
||||
__global int2* pHash,
|
||||
__global int* pCellStart,
|
||||
__global int* pPairBuff,
|
||||
__global int2* pPairBuffStartCurr,
|
||||
uint numLarge GUID_ARG)
|
||||
{
|
||||
int index = get_global_id(0);
|
||||
if(index >= numObjects)
|
||||
{
|
||||
return;
|
||||
}
|
||||
int2 sortedData = pHash[index];
|
||||
int unsorted_indx = sortedData.y;
|
||||
float4 min0 = pAABB[unsorted_indx*2 + 0];
|
||||
float4 max0 = pAABB[unsorted_indx*2 + 1];
|
||||
int handleIndex = as_int(min0.w);
|
||||
int2 start_curr = pPairBuffStartCurr[handleIndex];
|
||||
int start = start_curr.x;
|
||||
int curr = start_curr.y;
|
||||
int2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
|
||||
int curr_max = start_curr_next.x - start - 1;
|
||||
for(uint i = 0; i < numLarge; i++)
|
||||
{
|
||||
int indx2 = numObjects + i;
|
||||
float4 min1 = pAABB[indx2*2 + 0];
|
||||
float4 max1 = pAABB[indx2*2 + 1];
|
||||
if(testAABBOverlap(min0, max0, min1, max1))
|
||||
{
|
||||
int k;
|
||||
int handleIndex2 = as_int(min1.w);
|
||||
for(k = 0; k < curr; k++)
|
||||
{
|
||||
int old_pair = pPairBuff[start+k] & (~0x60000000);
|
||||
if(old_pair == handleIndex2)
|
||||
{
|
||||
pPairBuff[start+k] |= 0x40000000;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(k == curr)
|
||||
{
|
||||
pPairBuff[start+curr] = handleIndex2 | 0x20000000;
|
||||
if(curr >= curr_max)
|
||||
{ // not a good solution, but let's avoid crash
|
||||
break;
|
||||
}
|
||||
curr++;
|
||||
}
|
||||
}
|
||||
}
|
||||
int2 newStartCurr;
|
||||
newStartCurr.x = start;
|
||||
newStartCurr.y = curr;
|
||||
pPairBuffStartCurr[handleIndex] = newStartCurr;
|
||||
return;
|
||||
}
|
||||
|
||||
__kernel void kComputePairCacheChanges( int numObjects,
|
||||
__global int* pPairBuff,
|
||||
__global int2* pPairBuffStartCurr,
|
||||
__global int* pPairScan,
|
||||
__global float4* pAABB GUID_ARG)
|
||||
{
|
||||
int index = get_global_id(0);
|
||||
if(index >= numObjects)
|
||||
{
|
||||
return;
|
||||
}
|
||||
float4 bbMin = pAABB[index * 2];
|
||||
int handleIndex = as_int(bbMin.w);
|
||||
int2 start_curr = pPairBuffStartCurr[handleIndex];
|
||||
int start = start_curr.x;
|
||||
int curr = start_curr.y;
|
||||
__global int *pInp = pPairBuff + start;
|
||||
int num_changes = 0;
|
||||
for(int k = 0; k < curr; k++, pInp++)
|
||||
{
|
||||
if(!((*pInp) & 0x40000000))
|
||||
{
|
||||
num_changes++;
|
||||
}
|
||||
}
|
||||
pPairScan[index+1] = num_changes;
|
||||
}
|
||||
|
||||
__kernel void kSqueezeOverlappingPairBuff( int numObjects,
|
||||
__global int* pPairBuff,
|
||||
__global int2* pPairBuffStartCurr,
|
||||
__global int* pPairScan,
|
||||
__global int* pPairOut,
|
||||
__global float4* pAABB GUID_ARG)
|
||||
{
|
||||
int index = get_global_id(0);
|
||||
if(index >= numObjects)
|
||||
{
|
||||
return;
|
||||
}
|
||||
float4 bbMin = pAABB[index * 2];
|
||||
int handleIndex = as_int(bbMin.w);
|
||||
int2 start_curr = pPairBuffStartCurr[handleIndex];
|
||||
int start = start_curr.x;
|
||||
int curr = start_curr.y;
|
||||
__global int* pInp = pPairBuff + start;
|
||||
__global int* pOut = pPairOut + pPairScan[index+1];
|
||||
__global int* pOut2 = pInp;
|
||||
int num = 0;
|
||||
for(int k = 0; k < curr; k++, pInp++)
|
||||
{
|
||||
if(!((*pInp) & 0x40000000))
|
||||
{
|
||||
*pOut = *pInp;
|
||||
pOut++;
|
||||
}
|
||||
if((*pInp) & 0x60000000)
|
||||
{
|
||||
*pOut2 = (*pInp) & (~0x60000000);
|
||||
pOut2++;
|
||||
num++;
|
||||
}
|
||||
}
|
||||
int2 newStartCurr;
|
||||
newStartCurr.x = start;
|
||||
newStartCurr.y = num;
|
||||
pPairBuffStartCurr[handleIndex] = newStartCurr;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
);
|
||||
@@ -0,0 +1,697 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
||||
Copyright (C) 2006 - 2009 Sony Computer Entertainment Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
|
||||
#include "LinearMath/btAlignedAllocator.h"
|
||||
#include "LinearMath/btQuickprof.h"
|
||||
#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h"
|
||||
#include "../basic_initialize/btOpenCLUtils.h"
|
||||
|
||||
#include "bt3dGridBroadphaseOCL.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "Adl/Adl.h"
|
||||
#include <AdlPrimitives/Scan/PrefixScan.h>
|
||||
#include <AdlPrimitives/Sort/RadixSort32.h>
|
||||
#include <AdlPrimitives/Sort/RadixSort.h>
|
||||
|
||||
#define ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
|
||||
|
||||
#define GRID_OCL_PATH "..\\..\\opencl\\3dGridBroadphase\\Shared\\bt3dGridBroadphaseOCL.cl"
|
||||
|
||||
|
||||
#define MSTRINGIFY(A) #A
|
||||
|
||||
static const char* spProgramSource =
|
||||
#include "bt3dGridBroadphaseOCL.cl"
|
||||
|
||||
adl::PrefixScan<adl::TYPE_CL>::Data* gData1=0;
|
||||
adl::Buffer<unsigned int>* m_srcClBuffer=0;
|
||||
|
||||
struct MySortData
|
||||
{
|
||||
int key;
|
||||
int value;
|
||||
};
|
||||
|
||||
adl::RadixSort32<adl::TYPE_CL>::Data* dataC = 0;
|
||||
adl::RadixSort<adl::TYPE_HOST>::Data* dataHost = 0;
|
||||
|
||||
|
||||
static unsigned int infElem = 0x2fffffff;
|
||||
|
||||
static unsigned int zeroEl = 0;
|
||||
static unsigned int minusOne= -1;
|
||||
|
||||
|
||||
bt3dGridBroadphaseOCL::bt3dGridBroadphaseOCL( btOverlappingPairCache* overlappingPairCache,
|
||||
const btVector3& cellSize,
|
||||
int gridSizeX, int gridSizeY, int gridSizeZ,
|
||||
int maxSmallProxies, int maxLargeProxies, int maxPairsPerSmallProxy,
|
||||
btScalar maxSmallProxySize,
|
||||
int maxSmallProxiesPerCell,
|
||||
cl_context context, cl_device_id device, cl_command_queue queue,
|
||||
adl::DeviceCL* deviceCL
|
||||
) :
|
||||
btGpu3DGridBroadphase(overlappingPairCache, cellSize, gridSizeX, gridSizeY, gridSizeZ, maxSmallProxies, maxLargeProxies, maxPairsPerSmallProxy, maxSmallProxySize, maxSmallProxiesPerCell)
|
||||
{
|
||||
|
||||
|
||||
initCL(context, device, queue);
|
||||
allocateBuffers();
|
||||
|
||||
prefillBuffers();
|
||||
|
||||
initKernels();
|
||||
|
||||
//create an Adl device host and OpenCL device
|
||||
|
||||
adl::DeviceUtils::Config cfg;
|
||||
m_deviceHost = adl::DeviceUtils::allocate( adl::TYPE_HOST, cfg );
|
||||
m_ownsDevice = false;
|
||||
if (!deviceCL)
|
||||
{
|
||||
m_ownsDevice = true;
|
||||
deviceCL = new adl::DeviceCL;
|
||||
deviceCL->m_context = context;
|
||||
deviceCL->m_deviceIdx = device;
|
||||
deviceCL->m_commandQueue = queue;
|
||||
deviceCL->m_kernelManager = new adl::KernelManager;
|
||||
}
|
||||
|
||||
m_deviceCL = deviceCL;
|
||||
|
||||
int minSize = 256*1024;
|
||||
int maxSortBuffer = maxSmallProxies < minSize ? minSize :maxSmallProxies;
|
||||
|
||||
m_srcClBuffer = new adl::Buffer<unsigned int> (m_deviceCL,maxSmallProxies+2);
|
||||
m_srcClBuffer->write(&zeroEl,1,0);
|
||||
|
||||
//m_srcClBuffer->write(&infElem,maxSmallProxies,0);
|
||||
m_srcClBuffer->write(&infElem,1,maxSmallProxies);
|
||||
m_srcClBuffer->write(&zeroEl,1,maxSmallProxies+1);
|
||||
m_deviceCL->waitForCompletion();
|
||||
|
||||
gData1 = adl::PrefixScan<adl::TYPE_CL>::allocate( m_deviceCL, maxSortBuffer+2,adl::PrefixScanBase::EXCLUSIVE );
|
||||
dataHost = adl::RadixSort<adl::TYPE_HOST>::allocate( m_deviceHost, maxSmallProxies+2 );
|
||||
dataC = adl::RadixSort32<adl::TYPE_CL>::allocate( m_deviceCL, maxSortBuffer+2 );
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
bt3dGridBroadphaseOCL::~bt3dGridBroadphaseOCL()
|
||||
{
|
||||
//btSimpleBroadphase will free memory of btSortedOverlappingPairCache, because m_ownsPairCache
|
||||
assert(m_bInitialized);
|
||||
adl::RadixSort<adl::TYPE_HOST>::deallocate(dataHost);
|
||||
adl::PrefixScan<adl::TYPE_CL>::deallocate(gData1);
|
||||
adl::RadixSort32<adl::TYPE_CL>::deallocate(dataC);
|
||||
adl::DeviceUtils::deallocate(m_deviceHost);
|
||||
delete m_srcClBuffer;
|
||||
if (m_ownsDevice)
|
||||
{
|
||||
delete m_deviceCL->m_kernelManager;
|
||||
delete m_deviceCL;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CL_PLATFORM_MINI_CL
|
||||
// there is a problem with MSVC9 : static constructors are not called if variables defined in library and are not used
|
||||
// looks like it is because of optimization
|
||||
// probably this will happen with other compilers as well
|
||||
// so to make it robust, register kernels again (it is safe)
|
||||
#define MINICL_DECLARE(a) extern "C" void a();
|
||||
MINICL_DECLARE(kCalcHashAABB)
|
||||
MINICL_DECLARE(kClearCellStart)
|
||||
MINICL_DECLARE(kFindCellStart)
|
||||
MINICL_DECLARE(kFindOverlappingPairs)
|
||||
MINICL_DECLARE(kFindPairsLarge)
|
||||
MINICL_DECLARE(kComputePairCacheChanges)
|
||||
MINICL_DECLARE(kSqueezeOverlappingPairBuff)
|
||||
#undef MINICL_DECLARE
|
||||
#endif
|
||||
|
||||
void bt3dGridBroadphaseOCL::initCL(cl_context context, cl_device_id device, cl_command_queue queue)
|
||||
{
|
||||
|
||||
#ifdef CL_PLATFORM_MINI_CL
|
||||
// call constructors here
|
||||
MINICL_REGISTER(kCalcHashAABB)
|
||||
MINICL_REGISTER(kClearCellStart)
|
||||
MINICL_REGISTER(kFindCellStart)
|
||||
MINICL_REGISTER(kFindOverlappingPairs)
|
||||
MINICL_REGISTER(kFindPairsLarge)
|
||||
MINICL_REGISTER(kComputePairCacheChanges)
|
||||
MINICL_REGISTER(kSqueezeOverlappingPairBuff)
|
||||
#endif
|
||||
|
||||
cl_int ciErrNum;
|
||||
|
||||
btAssert(context);
|
||||
m_cxMainContext = context;
|
||||
btAssert(device);
|
||||
m_cdDevice = device;
|
||||
btAssert(queue);
|
||||
m_cqCommandQue = queue;
|
||||
|
||||
//adl::Kernel kern = m_deviceCL->getKernel(fileName,funcName,options,src);
|
||||
|
||||
m_cpProgram = btOpenCLUtils::compileCLProgramFromString(m_cxMainContext,m_cdDevice,spProgramSource, &ciErrNum,"-DGUID_ARG=""""",GRID_OCL_PATH);
|
||||
|
||||
printf("OK\n");
|
||||
}
|
||||
|
||||
|
||||
void bt3dGridBroadphaseOCL::initKernels()
|
||||
{
|
||||
initKernel(GRID3DOCL_KERNEL_CALC_HASH_AABB, "kCalcHashAABB");
|
||||
setKernelArg(GRID3DOCL_KERNEL_CALC_HASH_AABB, 1, sizeof(cl_mem),(void*)&m_dAABB);
|
||||
setKernelArg(GRID3DOCL_KERNEL_CALC_HASH_AABB, 2, sizeof(cl_mem),(void*)&m_dBodiesHash);
|
||||
setKernelArg(GRID3DOCL_KERNEL_CALC_HASH_AABB, 3, sizeof(cl_mem),(void*)&m_dBpParams);
|
||||
|
||||
initKernel(GRID3DOCL_KERNEL_CLEAR_CELL_START, "kClearCellStart");
|
||||
setKernelArg(GRID3DOCL_KERNEL_CLEAR_CELL_START, 1, sizeof(cl_mem),(void*)&m_dCellStart);
|
||||
|
||||
initKernel(GRID3DOCL_KERNEL_FIND_CELL_START, "kFindCellStart");
|
||||
setKernelArg(GRID3DOCL_KERNEL_FIND_CELL_START, 1, sizeof(cl_mem),(void*)&m_dBodiesHash);
|
||||
setKernelArg(GRID3DOCL_KERNEL_FIND_CELL_START, 2, sizeof(cl_mem),(void*)&m_dCellStart);
|
||||
|
||||
initKernel(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, "kFindOverlappingPairs");
|
||||
setKernelArg(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, 1, sizeof(cl_mem),(void*)&m_dAABB);
|
||||
setKernelArg(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, 2, sizeof(cl_mem),(void*)&m_dBodiesHash);
|
||||
setKernelArg(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, 3, sizeof(cl_mem),(void*)&m_dCellStart);
|
||||
setKernelArg(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, 4, sizeof(cl_mem),(void*)&m_dPairBuff);
|
||||
setKernelArg(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, 5, sizeof(cl_mem),(void*)&m_dPairBuffStartCurr);
|
||||
setKernelArg(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, 6, sizeof(cl_mem),(void*)&m_dBpParams);
|
||||
|
||||
initKernel(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, "kFindPairsLarge");
|
||||
setKernelArg(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, 1, sizeof(cl_mem),(void*)&m_dAABB);
|
||||
setKernelArg(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, 2, sizeof(cl_mem),(void*)&m_dBodiesHash);
|
||||
setKernelArg(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, 3, sizeof(cl_mem),(void*)&m_dCellStart);
|
||||
setKernelArg(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, 4, sizeof(cl_mem),(void*)&m_dPairBuff);
|
||||
setKernelArg(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, 5, sizeof(cl_mem),(void*)&m_dPairBuffStartCurr);
|
||||
|
||||
initKernel(GRID3DOCL_KERNEL_COMPUTE_CACHE_CHANGES, "kComputePairCacheChanges");
|
||||
setKernelArg(GRID3DOCL_KERNEL_COMPUTE_CACHE_CHANGES, 1, sizeof(cl_mem),(void*)&m_dPairBuff);
|
||||
setKernelArg(GRID3DOCL_KERNEL_COMPUTE_CACHE_CHANGES, 2, sizeof(cl_mem),(void*)&m_dPairBuffStartCurr);
|
||||
setKernelArg(GRID3DOCL_KERNEL_COMPUTE_CACHE_CHANGES, 3, sizeof(cl_mem),(void*)&m_dPairScanChanged);
|
||||
setKernelArg(GRID3DOCL_KERNEL_COMPUTE_CACHE_CHANGES, 4, sizeof(cl_mem),(void*)&m_dAABB);
|
||||
|
||||
initKernel(GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, "kSqueezeOverlappingPairBuff");
|
||||
setKernelArg(GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, 1, sizeof(cl_mem),(void*)&m_dPairBuff);
|
||||
setKernelArg(GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, 2, sizeof(cl_mem),(void*)&m_dPairBuffStartCurr);
|
||||
setKernelArg(GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, 3, sizeof(cl_mem),(void*)&m_dPairScanChanged);
|
||||
setKernelArg(GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, 4, sizeof(cl_mem),(void*)&m_dPairsChanged);
|
||||
setKernelArg(GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, 5, sizeof(cl_mem),(void*)&m_dAABB);
|
||||
|
||||
}
|
||||
|
||||
|
||||
void bt3dGridBroadphaseOCL::allocateBuffers()
|
||||
{
|
||||
cl_int ciErrNum;
|
||||
unsigned int memSize;
|
||||
// current version of bitonic sort works for power of 2 arrays only, so ...
|
||||
m_hashSize = 1;
|
||||
for(int bit = 1; bit < 32; bit++)
|
||||
{
|
||||
if(m_hashSize >= m_maxHandles)
|
||||
{
|
||||
break;
|
||||
}
|
||||
m_hashSize <<= 1;
|
||||
}
|
||||
memSize = m_hashSize * 2 * sizeof(unsigned int);
|
||||
if (memSize < 1024*1024)
|
||||
memSize = 1024*1024;
|
||||
|
||||
m_dBodiesHash = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
|
||||
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
memSize = m_numCells * sizeof(unsigned int);
|
||||
m_dCellStart = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
|
||||
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
memSize = m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int);
|
||||
m_dPairBuff = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
|
||||
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
memSize = (m_maxHandles * 2 + 1) * sizeof(unsigned int);
|
||||
m_dPairBuffStartCurr = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
|
||||
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
unsigned int numAABB = m_maxHandles + m_maxLargeHandles;
|
||||
memSize = numAABB * sizeof(float) * 4 * 2;
|
||||
m_dAABB = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
|
||||
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
memSize = (m_maxHandles + 2) * sizeof(unsigned int);
|
||||
m_dPairScanChanged = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
|
||||
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
memSize = m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int);
|
||||
m_dPairsChanged = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
|
||||
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
m_dPairsContiguous = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
|
||||
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
memSize = 3 * 4 * sizeof(float);
|
||||
m_dBpParams = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
|
||||
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
}
|
||||
|
||||
void bt3dGridBroadphaseOCL::prefillBuffers()
|
||||
{
|
||||
memset(m_hBodiesHash, 0xFF, m_maxHandles*2*sizeof(unsigned int));
|
||||
copyArrayToDevice(m_dBodiesHash, m_hBodiesHash, m_maxHandles * 2 * sizeof(unsigned int));
|
||||
// now fill the rest (bitonic sorting works with size == pow of 2)
|
||||
int remainder = m_hashSize - m_maxHandles;
|
||||
if(remainder)
|
||||
{
|
||||
copyArrayToDevice(m_dBodiesHash, m_hBodiesHash, remainder * 2 * sizeof(unsigned int), m_maxHandles * 2 * sizeof(unsigned int), 0);
|
||||
}
|
||||
copyArrayToDevice(m_dPairBuffStartCurr, m_hPairBuffStartCurr, (m_maxHandles * 2 + 1) * sizeof(unsigned int));
|
||||
memset(m_hPairBuff, 0x00, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int));
|
||||
copyArrayToDevice(m_dPairBuff, m_hPairBuff, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int));
|
||||
}
|
||||
|
||||
|
||||
void bt3dGridBroadphaseOCL::initKernel(int kernelId, char* pName)
|
||||
{
|
||||
|
||||
cl_int ciErrNum;
|
||||
cl_kernel kernel = clCreateKernel(m_cpProgram, pName, &ciErrNum);
|
||||
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
size_t wgSize;
|
||||
ciErrNum = clGetKernelWorkGroupInfo(kernel, m_cdDevice, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wgSize, NULL);
|
||||
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
m_kernels[kernelId].m_Id = kernelId;
|
||||
m_kernels[kernelId].m_kernel = kernel;
|
||||
m_kernels[kernelId].m_name = pName;
|
||||
m_kernels[kernelId].m_workgroupSize = (int)wgSize;
|
||||
return;
|
||||
}
|
||||
|
||||
void bt3dGridBroadphaseOCL::runKernelWithWorkgroupSize(int kernelId, int globalSize)
|
||||
{
|
||||
if(globalSize <= 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
cl_kernel kernelFunc = m_kernels[kernelId].m_kernel;
|
||||
cl_int ciErrNum = clSetKernelArg(kernelFunc, 0, sizeof(int), (void*)&globalSize);
|
||||
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
int workgroupSize = btMin(64,m_kernels[kernelId].m_workgroupSize);
|
||||
|
||||
if(workgroupSize <= 0)
|
||||
{ // let OpenCL library calculate workgroup size
|
||||
size_t globalWorkSize[2];
|
||||
globalWorkSize[0] = globalSize;
|
||||
globalWorkSize[1] = 1;
|
||||
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, kernelFunc, 1, NULL, globalWorkSize, NULL, 0,0,0 );
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t localWorkSize[2], globalWorkSize[2];
|
||||
//workgroupSize = btMin(workgroupSize, globalSize);
|
||||
int num_t = globalSize / workgroupSize;
|
||||
int num_g = num_t * workgroupSize;
|
||||
if(num_g < globalSize)
|
||||
{
|
||||
num_t++;
|
||||
}
|
||||
localWorkSize[0] = workgroupSize;
|
||||
globalWorkSize[0] = num_t * workgroupSize;
|
||||
localWorkSize[1] = 1;
|
||||
globalWorkSize[1] = 1;
|
||||
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, kernelFunc, 1, NULL, globalWorkSize, localWorkSize, 0,0,0 );
|
||||
}
|
||||
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
ciErrNum = clFlush(m_cqCommandQue);
|
||||
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
void bt3dGridBroadphaseOCL::setKernelArg(int kernelId, int argNum, int argSize, void* argPtr)
|
||||
{
|
||||
cl_int ciErrNum;
|
||||
ciErrNum = clSetKernelArg(m_kernels[kernelId].m_kernel, argNum, argSize, argPtr);
|
||||
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
void bt3dGridBroadphaseOCL::copyArrayToDevice(cl_mem device, const void* host, unsigned int size, int devOffs, int hostOffs)
|
||||
{
|
||||
if (size)
|
||||
{
|
||||
cl_int ciErrNum;
|
||||
char* pHost = (char*)host + hostOffs;
|
||||
ciErrNum = clEnqueueWriteBuffer(m_cqCommandQue, device, CL_TRUE, devOffs, size, pHost, 0, NULL, NULL);
|
||||
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
}
|
||||
}
|
||||
|
||||
void bt3dGridBroadphaseOCL::copyArrayFromDevice(void* host, const cl_mem device, unsigned int size, int hostOffs, int devOffs)
|
||||
{
|
||||
if (size)
|
||||
{
|
||||
cl_int ciErrNum;
|
||||
char* pHost = (char*)host + hostOffs;
|
||||
ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, device, CL_TRUE, devOffs, size, pHost, 0, NULL, NULL);
|
||||
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
//
|
||||
// overrides
|
||||
//
|
||||
|
||||
|
||||
void bt3dGridBroadphaseOCL::prepareAABB()
|
||||
{
|
||||
btGpu3DGridBroadphase::prepareAABB();
|
||||
copyArrayToDevice(m_dAABB, m_hAABB, sizeof(bt3DGrid3F1U) * 2 * (m_numHandles + m_numLargeHandles));
|
||||
return;
|
||||
}
|
||||
|
||||
void bt3dGridBroadphaseOCL::setParameters(bt3DGridBroadphaseParams* hostParams)
|
||||
{
|
||||
btGpu3DGridBroadphase::setParameters(hostParams);
|
||||
struct btParamsBpOCL
|
||||
{
|
||||
float m_invCellSize[4];
|
||||
int m_gridSize[4];
|
||||
};
|
||||
btParamsBpOCL hParams;
|
||||
hParams.m_invCellSize[0] = m_params.m_invCellSizeX;
|
||||
hParams.m_invCellSize[1] = m_params.m_invCellSizeY;
|
||||
hParams.m_invCellSize[2] = m_params.m_invCellSizeZ;
|
||||
hParams.m_invCellSize[3] = 0.f;
|
||||
hParams.m_gridSize[0] = m_params.m_gridSizeX;
|
||||
hParams.m_gridSize[1] = m_params.m_gridSizeY;
|
||||
hParams.m_gridSize[2] = m_params.m_gridSizeZ;
|
||||
hParams.m_gridSize[3] = m_params.m_maxBodiesPerCell;
|
||||
copyArrayToDevice(m_dBpParams, &hParams, sizeof(btParamsBpOCL));
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void bt3dGridBroadphaseOCL::calcHashAABB()
|
||||
{
|
||||
BT_PROFILE("calcHashAABB");
|
||||
#if 1
|
||||
runKernelWithWorkgroupSize(GRID3DOCL_KERNEL_CALC_HASH_AABB, m_numHandles);
|
||||
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
|
||||
clFinish(m_cqCommandQue);
|
||||
#endif //ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
|
||||
|
||||
#else
|
||||
btGpu3DGridBroadphase::calcHashAABB();
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void bt3dGridBroadphaseOCL::sortHash()
|
||||
{
|
||||
BT_PROFILE("sortHash");
|
||||
#ifdef CL_PLATFORM_MINI_CL
|
||||
//copyArrayFromDevice(m_hBodiesHash, m_dBodiesHash, m_numHandles * 2 * sizeof(unsigned int));
|
||||
btGpu3DGridBroadphase::sortHash();
|
||||
copyArrayToDevice(m_dBodiesHash, m_hBodiesHash, m_numHandles * 2 * sizeof(unsigned int));
|
||||
#else
|
||||
|
||||
//#define USE_HOST
|
||||
#ifdef USE_HOST
|
||||
copyArrayFromDevice(m_hBodiesHash, m_dBodiesHash, m_numHandles * 2 * sizeof(unsigned int));
|
||||
//adl::Buffer<unsigned int> keysIn,keysOut,valuesIn,valuesOut;
|
||||
///adl::RadixSort32<adl::TYPE_CL>::execute(dataC,keysIn,keysOut,valuesIn,valuesOut,m_numHandles);
|
||||
adl::HostBuffer<adl::SortData> inoutHost;
|
||||
inoutHost.m_device = m_deviceHost;
|
||||
inoutHost.m_ptr = (adl::SortData*)m_hBodiesHash;
|
||||
inoutHost.m_size = m_numHandles;
|
||||
adl::RadixSort<adl::TYPE_HOST>::execute(dataHost, inoutHost,m_numHandles);
|
||||
copyArrayToDevice(m_dBodiesHash, m_hBodiesHash, m_numHandles * 2 * sizeof(unsigned int));
|
||||
#else
|
||||
{
|
||||
clFinish(m_cqCommandQue);
|
||||
BT_PROFILE("RadixSort32::execute");
|
||||
adl::Buffer<adl::SortData> inout;
|
||||
inout.m_device = this->m_deviceCL;
|
||||
inout.m_size = m_numHandles;
|
||||
inout.m_ptr = (adl::SortData*)m_dBodiesHash;
|
||||
int actualHandles = m_numHandles;
|
||||
int dataAlignment = adl::RadixSort32<adl::TYPE_CL>::DATA_ALIGNMENT;
|
||||
|
||||
if (actualHandles%dataAlignment)
|
||||
{
|
||||
actualHandles += dataAlignment-(actualHandles%dataAlignment);
|
||||
}
|
||||
|
||||
adl::RadixSort32<adl::TYPE_CL>::execute(dataC,inout, actualHandles);
|
||||
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
|
||||
clFinish(m_cqCommandQue);
|
||||
#endif //ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
|
||||
}
|
||||
{
|
||||
//BT_PROFILE("copyArrayFromDevice");
|
||||
//copyArrayFromDevice(m_hBodiesHash, m_dBodiesHash, m_numHandles * 2 * sizeof(unsigned int));
|
||||
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
|
||||
clFinish(m_cqCommandQue);
|
||||
#endif //ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
|
||||
}
|
||||
|
||||
|
||||
#endif //USE_HOST
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void bt3dGridBroadphaseOCL::findCellStart()
|
||||
{
|
||||
#if 1
|
||||
BT_PROFILE("findCellStart");
|
||||
|
||||
#if defined(CL_PLATFORM_MINI_CL)
|
||||
btGpu3DGridBroadphase::findCellStart();
|
||||
copyArrayToDevice(m_dCellStart, m_hCellStart, m_numCells * sizeof(unsigned int));
|
||||
#else
|
||||
runKernelWithWorkgroupSize(GRID3DOCL_KERNEL_CLEAR_CELL_START, m_numCells);
|
||||
runKernelWithWorkgroupSize(GRID3DOCL_KERNEL_FIND_CELL_START, m_numHandles);
|
||||
#endif
|
||||
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
|
||||
clFinish(m_cqCommandQue);
|
||||
#endif
|
||||
|
||||
#else
|
||||
btGpu3DGridBroadphase::findCellStart();
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void bt3dGridBroadphaseOCL::findOverlappingPairs()
|
||||
{
|
||||
#if 1
|
||||
BT_PROFILE("findOverlappingPairs");
|
||||
runKernelWithWorkgroupSize(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, m_numHandles);
|
||||
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
|
||||
clFinish(m_cqCommandQue);
|
||||
#endif
|
||||
|
||||
#else
|
||||
btGpu3DGridBroadphase::findOverlappingPairs();
|
||||
copyArrayToDevice(m_dPairBuffStartCurr, m_hPairBuffStartCurr, (m_maxHandles * 2 + 1) * sizeof(unsigned int));
|
||||
copyArrayToDevice(m_dPairBuff, m_hPairBuff, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int));
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void bt3dGridBroadphaseOCL::findPairsLarge()
|
||||
{
|
||||
BT_PROFILE("findPairsLarge");
|
||||
#if 1
|
||||
if(m_numLargeHandles)
|
||||
{
|
||||
setKernelArg(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, 6, sizeof(int),(void*)&m_numLargeHandles);
|
||||
runKernelWithWorkgroupSize(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, m_numHandles);
|
||||
}
|
||||
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
|
||||
clFinish(m_cqCommandQue);
|
||||
#endif
|
||||
|
||||
#else
|
||||
btGpu3DGridBroadphase::findPairsLarge();
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void bt3dGridBroadphaseOCL::computePairCacheChanges()
|
||||
{
|
||||
BT_PROFILE("computePairCacheChanges");
|
||||
#if 1
|
||||
runKernelWithWorkgroupSize(GRID3DOCL_KERNEL_COMPUTE_CACHE_CHANGES, m_numHandles);
|
||||
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
|
||||
clFinish(m_cqCommandQue);
|
||||
#endif
|
||||
copyArrayFromDevice( m_hPairScanChanged,m_dPairScanChanged, sizeof(unsigned int)*(m_numHandles + 2));
|
||||
|
||||
#else
|
||||
btGpu3DGridBroadphase::computePairCacheChanges();
|
||||
copyArrayToDevice(m_dPairScanChanged, m_hPairScanChanged, sizeof(unsigned int)*(m_numHandles + 2));
|
||||
|
||||
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
extern cl_device_type deviceType;
|
||||
|
||||
void bt3dGridBroadphaseOCL::scanOverlappingPairBuff(bool copyToCpu)
|
||||
{
|
||||
|
||||
//Intel/CPU version doesn't handlel Adl scan well
|
||||
#if 0
|
||||
{
|
||||
copyArrayFromDevice(m_hPairScanChanged, m_dPairScanChanged, sizeof(unsigned int)*(m_numHandles + 2));
|
||||
btGpu3DGridBroadphase::scanOverlappingPairBuff();
|
||||
copyArrayToDevice(m_dPairScanChanged, m_hPairScanChanged, sizeof(unsigned int)*(m_numHandles + 2));
|
||||
m_numPrefixSum = m_hPairScanChanged[m_numHandles+1];
|
||||
clFinish(m_cqCommandQue);
|
||||
//memset(m_hPairScanChanged,0,sizeof(int)*m_maxHandles + 2);
|
||||
}
|
||||
#else
|
||||
{
|
||||
|
||||
// copyArrayFromDevice(m_hPairScanChanged, m_dPairScanChanged, sizeof(unsigned int)*(m_numHandles + 2));
|
||||
// btGpu3DGridBroadphase::scanOverlappingPairBuff();
|
||||
|
||||
adl::Buffer<unsigned int> destBuffer;
|
||||
|
||||
{
|
||||
BT_PROFILE("copy GPU->GPU");
|
||||
|
||||
destBuffer.m_ptr = (unsigned int*)m_dPairScanChanged;
|
||||
destBuffer.m_device = m_deviceCL;
|
||||
destBuffer.m_size = sizeof(unsigned int)*(m_numHandles+2);
|
||||
m_deviceCL->copy(m_srcClBuffer, &destBuffer,m_numHandles,1,1);
|
||||
|
||||
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
|
||||
clFinish(m_cqCommandQue);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
{
|
||||
BT_PROFILE("PrefixScan");
|
||||
|
||||
adl::PrefixScan<adl::TYPE_CL>::execute(gData1,*m_srcClBuffer,destBuffer, m_numHandles+2,&m_numPrefixSum);
|
||||
|
||||
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
|
||||
clFinish(m_cqCommandQue);
|
||||
#endif
|
||||
//if (m_numPrefixSum>0x1000)
|
||||
// {
|
||||
// printf("error m_numPrefixSum==%d\n",m_numPrefixSum);
|
||||
// }
|
||||
|
||||
}
|
||||
|
||||
#if 0
|
||||
unsigned int* verifyhPairScanChanged = new unsigned int[m_maxHandles + 2];
|
||||
memset(verifyhPairScanChanged,0,sizeof(int)*m_maxHandles + 2);
|
||||
|
||||
copyArrayFromDevice(verifyhPairScanChanged, m_dPairScanChanged, sizeof(unsigned int)*(m_numHandles + 2));
|
||||
clFinish(m_cqCommandQue);
|
||||
|
||||
/*for (int i=0;i<m_numHandles+2;i++)
|
||||
{
|
||||
if (verifyhPairScanChanged[i] != m_hPairScanChanged[i])
|
||||
{
|
||||
printf("hello!\n");
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
if (1)
|
||||
{
|
||||
|
||||
//the data
|
||||
if (copyToCpu)
|
||||
{
|
||||
BT_PROFILE("copy GPU -> CPU");
|
||||
copyArrayFromDevice(m_hPairScanChanged, m_dPairScanChanged, sizeof(unsigned int)*(m_numHandles + 2));
|
||||
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
|
||||
clFinish(m_cqCommandQue);
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
void bt3dGridBroadphaseOCL::squeezeOverlappingPairBuff()
|
||||
{
|
||||
BT_PROFILE("btCuda_squeezeOverlappingPairBuff");
|
||||
#if 1
|
||||
runKernelWithWorkgroupSize(GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, m_numHandles);
|
||||
// btCuda_squeezeOverlappingPairBuff(m_dPairBuff, m_dPairBuffStartCurr, m_dPairScanChanged, m_dPairsChanged, m_dAABB, m_numHandles);
|
||||
|
||||
//copyArrayFromDevice(m_hPairsChanged, m_dPairsChanged, sizeof(unsigned int) * m_numPrefixSum);//m_hPairScanChanged[m_numHandles+1]); //gSum
|
||||
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
|
||||
clFinish(m_cqCommandQue);
|
||||
#endif
|
||||
|
||||
#else
|
||||
btGpu3DGridBroadphase::squeezeOverlappingPairBuff();
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void bt3dGridBroadphaseOCL::resetPool(btDispatcher* dispatcher)
|
||||
{
|
||||
btGpu3DGridBroadphase::resetPool(dispatcher);
|
||||
prefillBuffers();
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,146 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
||||
Copyright (C) 2006 - 2009 Sony Computer Entertainment Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#ifndef BT3DGRIDBROADPHASEOCL_H
|
||||
#define BT3DGRIDBROADPHASEOCL_H
|
||||
|
||||
#ifdef __APPLE__
|
||||
#ifdef USE_MINICL
|
||||
#include <MiniCL/cl.h>
|
||||
#else
|
||||
#include <MiniCL/cl.h>
|
||||
#endif
|
||||
//CL_PLATFORM_MINI_CL could be defined in build system
|
||||
#else
|
||||
//#include <GL/glew.h>
|
||||
// standard utility and system includes
|
||||
#ifdef USE_MINICL
|
||||
#include <MiniCL/cl.h>
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
// Extra CL/GL include
|
||||
//#include <CL/cl_gl.h>
|
||||
#endif //__APPLE__
|
||||
|
||||
namespace adl
|
||||
{
|
||||
struct Device;
|
||||
struct DeviceCL;
|
||||
};
|
||||
|
||||
#include "BulletCollision/BroadphaseCollision/btSimpleBroadphase.h"
|
||||
#include "btGpu3DGridBroadphaseSharedTypes.h"
|
||||
#include "btGpu3DGridBroadphase.h"
|
||||
|
||||
|
||||
#define GRID3DOCL_CHECKERROR(a, b) if((a)!=(b)) { printf("3D GRID OCL Error : %d\n", (a)); btAssert((a) == (b)); }
|
||||
|
||||
enum
|
||||
{
|
||||
GRID3DOCL_KERNEL_CALC_HASH_AABB = 0,
|
||||
GRID3DOCL_KERNEL_CLEAR_CELL_START,
|
||||
GRID3DOCL_KERNEL_FIND_CELL_START,
|
||||
GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS,
|
||||
GRID3DOCL_KERNEL_FIND_PAIRS_LARGE,
|
||||
GRID3DOCL_KERNEL_COMPUTE_CACHE_CHANGES,
|
||||
GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF,
|
||||
GRID3DOCL_KERNEL_TOTAL
|
||||
};
|
||||
|
||||
struct bt3dGridOCLKernelInfo
|
||||
{
|
||||
int m_Id;
|
||||
cl_kernel m_kernel;
|
||||
char* m_name;
|
||||
int m_workgroupSize;
|
||||
};
|
||||
|
||||
|
||||
///The bt3dGridBroadphaseOCL uses OpenCL-capable GPU to compute overlapping pairs
|
||||
|
||||
class bt3dGridBroadphaseOCL : public btGpu3DGridBroadphase
|
||||
{
|
||||
protected:
|
||||
int m_hashSize;
|
||||
cl_context m_cxMainContext;
|
||||
cl_device_id m_cdDevice;
|
||||
cl_command_queue m_cqCommandQue;
|
||||
cl_program m_cpProgram;
|
||||
bt3dGridOCLKernelInfo m_kernels[GRID3DOCL_KERNEL_TOTAL];
|
||||
// data buffers
|
||||
cl_mem m_dBodiesHash;
|
||||
cl_mem m_dCellStart;
|
||||
cl_mem m_dPairBuff;
|
||||
cl_mem m_dPairBuffStartCurr;
|
||||
public:
|
||||
cl_mem m_dAABB;
|
||||
protected:
|
||||
cl_mem m_dPairScanChanged;
|
||||
cl_mem m_dPairsChanged;
|
||||
cl_mem m_dPairsContiguous;
|
||||
cl_mem m_dBpParams;
|
||||
|
||||
adl::Device* m_deviceHost;
|
||||
adl::DeviceCL* m_deviceCL;
|
||||
bool m_ownsDevice;
|
||||
|
||||
|
||||
public:
|
||||
unsigned int m_numPrefixSum;
|
||||
|
||||
bt3dGridBroadphaseOCL( btOverlappingPairCache* overlappingPairCache,
|
||||
const btVector3& cellSize,
|
||||
int gridSizeX, int gridSizeY, int gridSizeZ,
|
||||
int maxSmallProxies, int maxLargeProxies, int maxPairsPerSmallProxy,
|
||||
btScalar maxSmallProxySize,
|
||||
int maxSmallProxiesPerCell = 8,
|
||||
cl_context context = NULL,
|
||||
cl_device_id device = NULL,
|
||||
cl_command_queue queue = NULL,
|
||||
adl::DeviceCL* deviceCL = 0
|
||||
);
|
||||
virtual ~bt3dGridBroadphaseOCL();
|
||||
|
||||
protected:
|
||||
void initCL(cl_context context, cl_device_id device, cl_command_queue queue);
|
||||
void initKernels();
|
||||
void allocateBuffers();
|
||||
void prefillBuffers();
|
||||
void initKernel(int kernelId, char* pName);
|
||||
void allocateArray(void** devPtr, unsigned int size);
|
||||
void freeArray(void* devPtr);
|
||||
void runKernelWithWorkgroupSize(int kernelId, int globalSize);
|
||||
void setKernelArg(int kernelId, int argNum, int argSize, void* argPtr);
|
||||
void copyArrayToDevice(cl_mem device, const void* host, unsigned int size, int devOffs = 0, int hostOffs = 0);
|
||||
void copyArrayFromDevice(void* host, const cl_mem device, unsigned int size, int hostOffs = 0, int devOffs = 0);
|
||||
|
||||
// overrides
|
||||
virtual void setParameters(bt3DGridBroadphaseParams* hostParams);
|
||||
virtual void prepareAABB();
|
||||
virtual void calcHashAABB();
|
||||
virtual void sortHash();
|
||||
virtual void findCellStart();
|
||||
virtual void findOverlappingPairs();
|
||||
virtual void findPairsLarge();
|
||||
virtual void computePairCacheChanges();
|
||||
virtual void scanOverlappingPairBuff(bool copyToCpu=true);
|
||||
virtual void squeezeOverlappingPairBuff();
|
||||
virtual void resetPool(btDispatcher* dispatcher);
|
||||
};
|
||||
|
||||
#endif //BT3DGRIDBROADPHASEOCL_H
|
||||
@@ -0,0 +1,626 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
||||
Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
///The 3 following lines include the CPU implementation of the kernels, keep them in this order.
|
||||
#include "btGpuDefines.h"
|
||||
#include "btGpuUtilsSharedDefs.h"
|
||||
#include "btGpuUtilsSharedCode.h"
|
||||
|
||||
|
||||
|
||||
#include "LinearMath/btAlignedAllocator.h"
|
||||
#include "LinearMath/btQuickprof.h"
|
||||
#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h"
|
||||
|
||||
|
||||
|
||||
#include "btGpuDefines.h"
|
||||
#include "btGpuUtilsSharedDefs.h"
|
||||
|
||||
#include "btGpu3DGridBroadphaseSharedDefs.h"
|
||||
|
||||
#include "btGpu3DGridBroadphase.h"
|
||||
#include <string.h> //for memset
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
|
||||
|
||||
static bt3DGridBroadphaseParams s3DGridBroadphaseParams;
|
||||
|
||||
|
||||
|
||||
btGpu3DGridBroadphase::btGpu3DGridBroadphase( const btVector3& cellSize,
|
||||
int gridSizeX, int gridSizeY, int gridSizeZ,
|
||||
int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
|
||||
btScalar maxSmallProxySize,
|
||||
int maxBodiesPerCell) :
|
||||
btSimpleBroadphase(maxSmallProxies,
|
||||
// new (btAlignedAlloc(sizeof(btSortedOverlappingPairCache),16)) btSortedOverlappingPairCache),
|
||||
new (btAlignedAlloc(sizeof(btHashedOverlappingPairCache),16)) btHashedOverlappingPairCache),
|
||||
m_bInitialized(false),
|
||||
m_numBodies(0)
|
||||
{
|
||||
_initialize(cellSize, gridSizeX, gridSizeY, gridSizeZ,
|
||||
maxSmallProxies, maxLargeProxies, maxPairsPerBody,
|
||||
maxSmallProxySize, maxBodiesPerCell);
|
||||
}
|
||||
|
||||
|
||||
|
||||
btGpu3DGridBroadphase::btGpu3DGridBroadphase( btOverlappingPairCache* overlappingPairCache,
|
||||
const btVector3& cellSize,
|
||||
int gridSizeX, int gridSizeY, int gridSizeZ,
|
||||
int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
|
||||
btScalar maxSmallProxySize,
|
||||
int maxBodiesPerCell) :
|
||||
btSimpleBroadphase(maxSmallProxies, overlappingPairCache),
|
||||
m_bInitialized(false),
|
||||
m_numBodies(0)
|
||||
{
|
||||
_initialize(cellSize, gridSizeX, gridSizeY, gridSizeZ,
|
||||
maxSmallProxies, maxLargeProxies, maxPairsPerBody,
|
||||
maxSmallProxySize, maxBodiesPerCell);
|
||||
}
|
||||
|
||||
|
||||
|
||||
btGpu3DGridBroadphase::~btGpu3DGridBroadphase()
|
||||
{
|
||||
//btSimpleBroadphase will free memory of btSortedOverlappingPairCache, because m_ownsPairCache
|
||||
assert(m_bInitialized);
|
||||
_finalize();
|
||||
|
||||
|
||||
}
|
||||
|
||||
// returns 2^n : 2^(n+1) > val >= 2^n
|
||||
int btGpu3DGridBroadphase::getFloorPowOfTwo(int val)
|
||||
{
|
||||
int mask = 0x40000000;
|
||||
for(int k = 0; k < 30; k++, mask >>= 1)
|
||||
{
|
||||
if(mask & val)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
return mask;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::_initialize( const btVector3& cellSize,
|
||||
int gridSizeX, int gridSizeY, int gridSizeZ,
|
||||
int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
|
||||
btScalar maxSmallProxySize,
|
||||
int maxBodiesPerCell)
|
||||
{
|
||||
// set various paramerers
|
||||
m_ownsPairCache = true;
|
||||
m_params.m_gridSizeX = getFloorPowOfTwo(gridSizeX);
|
||||
m_params.m_gridSizeY = getFloorPowOfTwo(gridSizeY);
|
||||
m_params.m_gridSizeZ = getFloorPowOfTwo(gridSizeZ);
|
||||
m_params.m_numCells = m_params.m_gridSizeX * m_params.m_gridSizeY * m_params.m_gridSizeZ;
|
||||
m_numCells = m_params.m_numCells;
|
||||
m_params.m_invCellSizeX = btScalar(1.f) / cellSize[0];
|
||||
m_params.m_invCellSizeY = btScalar(1.f) / cellSize[1];
|
||||
m_params.m_invCellSizeZ = btScalar(1.f) / cellSize[2];
|
||||
m_maxRadius = maxSmallProxySize * btScalar(0.5f);
|
||||
m_params.m_numBodies = m_numBodies;
|
||||
m_params.m_maxBodiesPerCell = maxBodiesPerCell;
|
||||
|
||||
m_numLargeHandles = 0;
|
||||
m_maxLargeHandles = maxLargeProxies;
|
||||
|
||||
m_maxPairsPerBody = maxPairsPerBody;
|
||||
|
||||
m_LastLargeHandleIndex = -1;
|
||||
|
||||
assert(!m_bInitialized);
|
||||
|
||||
// allocate host storage
|
||||
m_hBodiesHash = new unsigned int[m_maxHandles * 2];
|
||||
memset(m_hBodiesHash, 0x00, m_maxHandles*2*sizeof(unsigned int));
|
||||
|
||||
m_hCellStart = new unsigned int[m_params.m_numCells];
|
||||
memset(m_hCellStart, 0x00, m_params.m_numCells * sizeof(unsigned int));
|
||||
|
||||
m_hPairBuffStartCurr = new unsigned int[m_maxHandles * 2 + 2];
|
||||
// --------------- for now, init with m_maxPairsPerBody for each body
|
||||
m_hPairBuffStartCurr[0] = 0;
|
||||
m_hPairBuffStartCurr[1] = 0;
|
||||
for(int i = 1; i <= m_maxHandles; i++)
|
||||
{
|
||||
m_hPairBuffStartCurr[i * 2] = m_hPairBuffStartCurr[(i-1) * 2] + m_maxPairsPerBody;
|
||||
m_hPairBuffStartCurr[i * 2 + 1] = 0;
|
||||
}
|
||||
//----------------
|
||||
unsigned int numAABB = m_maxHandles + m_maxLargeHandles;
|
||||
m_hAABB = new bt3DGrid3F1U[numAABB * 2]; // AABB Min & Max
|
||||
|
||||
m_hPairBuff = new unsigned int[m_maxHandles * m_maxPairsPerBody];
|
||||
memset(m_hPairBuff, 0x00, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int)); // needed?
|
||||
|
||||
m_hPairScanChanged = new unsigned int[m_maxHandles + 2];
|
||||
memset(m_hPairScanChanged,0,sizeof(int)*m_maxHandles + 2);
|
||||
|
||||
m_hPairsChanged = new unsigned int[m_maxHandles * m_maxPairsPerBody];
|
||||
memset(m_hPairsChanged,0,sizeof(int)*(m_maxHandles * m_maxPairsPerBody));
|
||||
|
||||
m_hAllOverlappingPairs= new MyUint2[m_maxHandles * m_maxPairsPerBody];
|
||||
memset(m_hAllOverlappingPairs,0,sizeof(MyUint2)*(m_maxHandles * m_maxPairsPerBody));
|
||||
|
||||
|
||||
// large proxies
|
||||
|
||||
// allocate handles buffer and put all handles on free list
|
||||
m_pLargeHandlesRawPtr = btAlignedAlloc(sizeof(btSimpleBroadphaseProxy) * m_maxLargeHandles, 16);
|
||||
m_pLargeHandles = new(m_pLargeHandlesRawPtr) btSimpleBroadphaseProxy[m_maxLargeHandles];
|
||||
m_firstFreeLargeHandle = 0;
|
||||
{
|
||||
for (int i = m_firstFreeLargeHandle; i < m_maxLargeHandles; i++)
|
||||
{
|
||||
m_pLargeHandles[i].SetNextFree(i + 1);
|
||||
m_pLargeHandles[i].m_uniqueId = m_maxHandles+2+i;
|
||||
}
|
||||
m_pLargeHandles[m_maxLargeHandles - 1].SetNextFree(0);
|
||||
}
|
||||
|
||||
// debug data
|
||||
m_numPairsAdded = 0;
|
||||
m_numOverflows = 0;
|
||||
|
||||
|
||||
m_bInitialized = true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::_finalize()
|
||||
{
|
||||
assert(m_bInitialized);
|
||||
delete [] m_hBodiesHash;
|
||||
delete [] m_hCellStart;
|
||||
delete [] m_hPairBuffStartCurr;
|
||||
delete [] m_hAABB;
|
||||
delete [] m_hPairBuff;
|
||||
delete [] m_hPairScanChanged;
|
||||
delete [] m_hPairsChanged;
|
||||
delete [] m_hAllOverlappingPairs;
|
||||
btAlignedFree(m_pLargeHandlesRawPtr);
|
||||
m_bInitialized = false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
|
||||
{
|
||||
btSimpleBroadphase::calculateOverlappingPairs(dispatcher);
|
||||
|
||||
if(m_numHandles <= 0)
|
||||
{
|
||||
BT_PROFILE("addLarge2LargePairsToCache");
|
||||
addLarge2LargePairsToCache(dispatcher);
|
||||
return;
|
||||
}
|
||||
// update constants
|
||||
{
|
||||
BT_PROFILE("setParameters");
|
||||
setParameters(&m_params);
|
||||
}
|
||||
|
||||
// prepare AABB array
|
||||
{
|
||||
BT_PROFILE("prepareAABB");
|
||||
prepareAABB();
|
||||
}
|
||||
// calculate hash
|
||||
{
|
||||
BT_PROFILE("calcHashAABB");
|
||||
calcHashAABB();
|
||||
}
|
||||
{
|
||||
BT_PROFILE("sortHash");
|
||||
// sort bodies based on hash
|
||||
sortHash();
|
||||
}
|
||||
// find start of each cell
|
||||
{
|
||||
BT_PROFILE("findCellStart");
|
||||
findCellStart();
|
||||
}
|
||||
{
|
||||
BT_PROFILE("findOverlappingPairs");
|
||||
// findOverlappingPairs (small/small)
|
||||
findOverlappingPairs();
|
||||
}
|
||||
// findOverlappingPairs (small/large)
|
||||
{
|
||||
BT_PROFILE("findPairsLarge");
|
||||
findPairsLarge();
|
||||
}
|
||||
// add pairs to CPU cache
|
||||
{
|
||||
BT_PROFILE("computePairCacheChanges");
|
||||
computePairCacheChanges();
|
||||
}
|
||||
{
|
||||
BT_PROFILE("scanOverlappingPairBuff");
|
||||
scanOverlappingPairBuff();
|
||||
}
|
||||
{
|
||||
BT_PROFILE("squeezeOverlappingPairBuff");
|
||||
squeezeOverlappingPairBuff();
|
||||
}
|
||||
{
|
||||
BT_PROFILE("addPairsToCache");
|
||||
addPairsToCache(dispatcher);
|
||||
}
|
||||
// find and add large/large pairs to CPU cache
|
||||
{
|
||||
BT_PROFILE("addLarge2LargePairsToCache");
|
||||
addLarge2LargePairsToCache(dispatcher);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::addPairsToCache(btDispatcher* dispatcher)
|
||||
{
|
||||
m_numPairsAdded = 0;
|
||||
m_numPairsRemoved = 0;
|
||||
for(int i = 0; i < m_numHandles; i++)
|
||||
{
|
||||
unsigned int num = m_hPairScanChanged[i+2] - m_hPairScanChanged[i+1];
|
||||
if(!num)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
unsigned int* pInp = m_hPairsChanged + m_hPairScanChanged[i+1];
|
||||
unsigned int index0 = m_hAABB[i * 2].uw;
|
||||
btSimpleBroadphaseProxy* proxy0 = &m_pHandles[index0];
|
||||
for(unsigned int j = 0; j < num; j++)
|
||||
{
|
||||
unsigned int indx1_s = pInp[j];
|
||||
unsigned int index1 = indx1_s & (~BT_3DGRID_PAIR_ANY_FLG);
|
||||
btSimpleBroadphaseProxy* proxy1;
|
||||
if(index1 < (unsigned int)m_maxHandles)
|
||||
{
|
||||
proxy1 = &m_pHandles[index1];
|
||||
}
|
||||
else
|
||||
{
|
||||
index1 -= m_maxHandles;
|
||||
btAssert((index1 >= 0) && (index1 < (unsigned int)m_maxLargeHandles));
|
||||
proxy1 = &m_pLargeHandles[index1];
|
||||
}
|
||||
if(indx1_s & BT_3DGRID_PAIR_NEW_FLG)
|
||||
{
|
||||
m_pairCache->addOverlappingPair(proxy0,proxy1);
|
||||
m_numPairsAdded++;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_pairCache->removeOverlappingPair(proxy0,proxy1,dispatcher);
|
||||
m_numPairsRemoved++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
btBroadphaseProxy* btGpu3DGridBroadphase::createProxy( const btVector3& aabbMin, const btVector3& aabbMax,int shapeType,void* userPtr ,short int collisionFilterGroup,short int collisionFilterMask, btDispatcher* dispatcher,void* multiSapProxy)
|
||||
{
|
||||
btBroadphaseProxy* proxy;
|
||||
bool bIsLarge = isLargeProxy(aabbMin, aabbMax);
|
||||
if(bIsLarge)
|
||||
{
|
||||
if (m_numLargeHandles >= m_maxLargeHandles)
|
||||
{
|
||||
///you have to increase the cell size, so 'large' proxies become 'small' proxies (fitting a cell)
|
||||
btAssert(0);
|
||||
return 0; //should never happen, but don't let the game crash ;-)
|
||||
}
|
||||
btAssert((aabbMin[0]<= aabbMax[0]) && (aabbMin[1]<= aabbMax[1]) && (aabbMin[2]<= aabbMax[2]));
|
||||
int newHandleIndex = allocLargeHandle();
|
||||
proxy = new (&m_pLargeHandles[newHandleIndex])btSimpleBroadphaseProxy(aabbMin,aabbMax,shapeType,userPtr,collisionFilterGroup,collisionFilterMask,multiSapProxy);
|
||||
}
|
||||
else
|
||||
{
|
||||
proxy = btSimpleBroadphase::createProxy(aabbMin, aabbMax, shapeType, userPtr, collisionFilterGroup, collisionFilterMask, dispatcher, multiSapProxy);
|
||||
}
|
||||
return proxy;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::destroyProxy(btBroadphaseProxy* proxy, btDispatcher* dispatcher)
|
||||
{
|
||||
bool bIsLarge = isLargeProxy(proxy);
|
||||
if(bIsLarge)
|
||||
{
|
||||
|
||||
btSimpleBroadphaseProxy* proxy0 = static_cast<btSimpleBroadphaseProxy*>(proxy);
|
||||
freeLargeHandle(proxy0);
|
||||
m_pairCache->removeOverlappingPairsContainingProxy(proxy,dispatcher);
|
||||
}
|
||||
else
|
||||
{
|
||||
btSimpleBroadphase::destroyProxy(proxy, dispatcher);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::resetPool(btDispatcher* dispatcher)
|
||||
{
|
||||
m_hPairBuffStartCurr[0] = 0;
|
||||
m_hPairBuffStartCurr[1] = 0;
|
||||
for(int i = 1; i <= m_maxHandles; i++)
|
||||
{
|
||||
m_hPairBuffStartCurr[i * 2] = m_hPairBuffStartCurr[(i-1) * 2] + m_maxPairsPerBody;
|
||||
m_hPairBuffStartCurr[i * 2 + 1] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool btGpu3DGridBroadphase::isLargeProxy(const btVector3& aabbMin, const btVector3& aabbMax)
|
||||
{
|
||||
btVector3 diag = aabbMax - aabbMin;
|
||||
///use the bounding sphere radius of this bounding box, to include rotation
|
||||
btScalar radius = diag.length() * btScalar(0.5f);
|
||||
return (radius > m_maxRadius);
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool btGpu3DGridBroadphase::isLargeProxy(btBroadphaseProxy* proxy)
|
||||
{
|
||||
return (proxy->getUid() >= (m_maxHandles+2));
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::addLarge2LargePairsToCache(btDispatcher* dispatcher)
|
||||
{
|
||||
int i,j;
|
||||
if (m_numLargeHandles <= 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
int new_largest_index = -1;
|
||||
for(i = 0; i <= m_LastLargeHandleIndex; i++)
|
||||
{
|
||||
btSimpleBroadphaseProxy* proxy0 = &m_pLargeHandles[i];
|
||||
new_largest_index = i;
|
||||
for(j = i + 1; j <= m_LastLargeHandleIndex; j++)
|
||||
{
|
||||
btSimpleBroadphaseProxy* proxy1 = &m_pLargeHandles[j];
|
||||
btAssert(proxy0 != proxy1);
|
||||
btSimpleBroadphaseProxy* p0 = getSimpleProxyFromProxy(proxy0);
|
||||
btSimpleBroadphaseProxy* p1 = getSimpleProxyFromProxy(proxy1);
|
||||
if(aabbOverlap(p0,p1))
|
||||
{
|
||||
if (!m_pairCache->findPair(proxy0,proxy1))
|
||||
{
|
||||
m_pairCache->addOverlappingPair(proxy0,proxy1);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(m_pairCache->findPair(proxy0,proxy1))
|
||||
{
|
||||
m_pairCache->removeOverlappingPair(proxy0,proxy1,dispatcher);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
m_LastLargeHandleIndex = new_largest_index;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback)
|
||||
{
|
||||
btSimpleBroadphase::rayTest(rayFrom, rayTo, rayCallback);
|
||||
for (int i=0; i <= m_LastLargeHandleIndex; i++)
|
||||
{
|
||||
btSimpleBroadphaseProxy* proxy = &m_pLargeHandles[i];
|
||||
rayCallback.process(proxy);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
//
|
||||
// overrides for CPU version
|
||||
//
|
||||
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::prepareAABB()
|
||||
{
|
||||
BT_PROFILE("prepareAABB");
|
||||
bt3DGrid3F1U* pBB = m_hAABB;
|
||||
int i;
|
||||
int new_largest_index = -1;
|
||||
unsigned int num_small = 0;
|
||||
for(i = 0; i <= m_LastHandleIndex; i++)
|
||||
{
|
||||
btSimpleBroadphaseProxy* proxy0 = &m_pHandles[i];
|
||||
new_largest_index = i;
|
||||
pBB->fx = proxy0->m_aabbMin.getX();
|
||||
pBB->fy = proxy0->m_aabbMin.getY();
|
||||
pBB->fz = proxy0->m_aabbMin.getZ();
|
||||
pBB->uw = i;
|
||||
pBB++;
|
||||
pBB->fx = proxy0->m_aabbMax.getX();
|
||||
pBB->fy = proxy0->m_aabbMax.getY();
|
||||
pBB->fz = proxy0->m_aabbMax.getZ();
|
||||
pBB->uw = num_small;
|
||||
pBB++;
|
||||
num_small++;
|
||||
}
|
||||
m_LastHandleIndex = new_largest_index;
|
||||
new_largest_index = -1;
|
||||
unsigned int num_large = 0;
|
||||
for(i = 0; i <= m_LastLargeHandleIndex; i++)
|
||||
{
|
||||
btSimpleBroadphaseProxy* proxy0 = &m_pLargeHandles[i];
|
||||
new_largest_index = i;
|
||||
pBB->fx = proxy0->m_aabbMin.getX();
|
||||
pBB->fy = proxy0->m_aabbMin.getY();
|
||||
pBB->fz = proxy0->m_aabbMin.getZ();
|
||||
pBB->uw = i + m_maxHandles;
|
||||
pBB++;
|
||||
pBB->fx = proxy0->m_aabbMax.getX();
|
||||
pBB->fy = proxy0->m_aabbMax.getY();
|
||||
pBB->fz = proxy0->m_aabbMax.getZ();
|
||||
pBB->uw = num_large + m_maxHandles;
|
||||
pBB++;
|
||||
num_large++;
|
||||
}
|
||||
m_LastLargeHandleIndex = new_largest_index;
|
||||
// paranoid checks
|
||||
btAssert(num_small == m_numHandles);
|
||||
btAssert(num_large == m_numLargeHandles);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::setParameters(bt3DGridBroadphaseParams* hostParams)
|
||||
{
|
||||
s3DGridBroadphaseParams = *hostParams;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::calcHashAABB()
|
||||
{
|
||||
BT_PROFILE("bt3DGrid_calcHashAABB");
|
||||
btGpu_calcHashAABB(m_hAABB, m_hBodiesHash, m_numHandles);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::sortHash()
|
||||
{
|
||||
class bt3DGridHashKey
|
||||
{
|
||||
public:
|
||||
unsigned int hash;
|
||||
unsigned int index;
|
||||
void quickSort(bt3DGridHashKey* pData, int lo, int hi)
|
||||
{
|
||||
int i=lo, j=hi;
|
||||
bt3DGridHashKey x = pData[(lo+hi)/2];
|
||||
do
|
||||
{
|
||||
while(pData[i].hash > x.hash) i++;
|
||||
while(x.hash > pData[j].hash) j--;
|
||||
if(i <= j)
|
||||
{
|
||||
bt3DGridHashKey t = pData[i];
|
||||
pData[i] = pData[j];
|
||||
pData[j] = t;
|
||||
i++; j--;
|
||||
}
|
||||
} while(i <= j);
|
||||
if(lo < j) pData->quickSort(pData, lo, j);
|
||||
if(i < hi) pData->quickSort(pData, i, hi);
|
||||
}
|
||||
};
|
||||
BT_PROFILE("bt3DGrid_sortHash");
|
||||
bt3DGridHashKey* pHash = (bt3DGridHashKey*)m_hBodiesHash;
|
||||
pHash->quickSort(pHash, 0, m_numHandles - 1);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::findCellStart()
|
||||
{
|
||||
BT_PROFILE("bt3DGrid_findCellStart");
|
||||
btGpu_findCellStart(m_hBodiesHash, m_hCellStart, m_numHandles, m_params.m_numCells);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::findOverlappingPairs()
|
||||
{
|
||||
BT_PROFILE("bt3DGrid_findOverlappingPairs");
|
||||
btGpu_findOverlappingPairs(m_hAABB, m_hBodiesHash, m_hCellStart, m_hPairBuff, m_hPairBuffStartCurr, m_numHandles);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::findPairsLarge()
|
||||
{
|
||||
BT_PROFILE("bt3DGrid_findPairsLarge");
|
||||
btGpu_findPairsLarge(m_hAABB, m_hBodiesHash, m_hCellStart, m_hPairBuff, m_hPairBuffStartCurr, m_numHandles, m_numLargeHandles);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::computePairCacheChanges()
|
||||
{
|
||||
BT_PROFILE("bt3DGrid_computePairCacheChanges");
|
||||
btGpu_computePairCacheChanges(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScanChanged, m_hAABB, m_numHandles);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::scanOverlappingPairBuff(bool copyToCpu)
|
||||
{
|
||||
BT_PROFILE("bt3DGrid_scanOverlappingPairBuff");
|
||||
unsigned int sum = 0;
|
||||
m_hPairScanChanged[0]=0;
|
||||
for(int i = 0; i <= m_numHandles+1; i++)
|
||||
{
|
||||
unsigned int delta = m_hPairScanChanged[i];
|
||||
m_hPairScanChanged[i] = sum;
|
||||
sum += delta;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::squeezeOverlappingPairBuff()
|
||||
{
|
||||
BT_PROFILE("bt3DGrid_squeezeOverlappingPairBuff");
|
||||
//btGpu_squeezeOverlappingPairBuff(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScanChanged, m_hPairsChanged, m_hAABB, m_numHandles);
|
||||
btGpu_squeezeOverlappingPairBuff(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScanChanged, (unsigned int*)m_hAllOverlappingPairs, m_hAABB, m_numHandles);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#include "btGpu3DGridBroadphaseSharedCode.h"
|
||||
|
||||
@@ -0,0 +1,154 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
||||
Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef BTGPU3DGRIDBROADPHASE_H
|
||||
#define BTGPU3DGRIDBROADPHASE_H
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
#include "BulletCollision/BroadphaseCollision/btSimpleBroadphase.h"
|
||||
|
||||
#include "btGpu3DGridBroadphaseSharedTypes.h"
|
||||
struct MyUint2
|
||||
{
|
||||
int x;
|
||||
int y;
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
///The btGpu3DGridBroadphase uses GPU-style code compiled for CPU to compute overlapping pairs
|
||||
|
||||
class btGpu3DGridBroadphase : public btSimpleBroadphase
|
||||
{
|
||||
protected:
|
||||
bool m_bInitialized;
|
||||
unsigned int m_numBodies;
|
||||
unsigned int m_numCells;
|
||||
unsigned int m_maxPairsPerBody;
|
||||
unsigned int m_maxBodiesPerCell;
|
||||
bt3DGridBroadphaseParams m_params;
|
||||
btScalar m_maxRadius;
|
||||
// CPU data
|
||||
unsigned int* m_hBodiesHash;
|
||||
unsigned int* m_hCellStart;
|
||||
unsigned int* m_hPairBuffStartCurr;
|
||||
bt3DGrid3F1U* m_hAABB;
|
||||
unsigned int* m_hPairBuff;
|
||||
unsigned int* m_hPairScanChanged;
|
||||
unsigned int* m_hPairsChanged;
|
||||
MyUint2* m_hAllOverlappingPairs;
|
||||
// large proxies
|
||||
int m_numLargeHandles;
|
||||
int m_maxLargeHandles;
|
||||
int m_LastLargeHandleIndex;
|
||||
btSimpleBroadphaseProxy* m_pLargeHandles;
|
||||
void* m_pLargeHandlesRawPtr;
|
||||
int m_firstFreeLargeHandle;
|
||||
int allocLargeHandle()
|
||||
{
|
||||
btAssert(m_numLargeHandles < m_maxLargeHandles);
|
||||
int freeLargeHandle = m_firstFreeLargeHandle;
|
||||
m_firstFreeLargeHandle = m_pLargeHandles[freeLargeHandle].GetNextFree();
|
||||
m_numLargeHandles++;
|
||||
if(freeLargeHandle > m_LastLargeHandleIndex)
|
||||
{
|
||||
m_LastLargeHandleIndex = freeLargeHandle;
|
||||
}
|
||||
return freeLargeHandle;
|
||||
}
|
||||
void freeLargeHandle(btSimpleBroadphaseProxy* proxy)
|
||||
{
|
||||
int handle = int(proxy - m_pLargeHandles);
|
||||
btAssert((handle >= 0) && (handle < m_maxHandles));
|
||||
if(handle == m_LastLargeHandleIndex)
|
||||
{
|
||||
m_LastLargeHandleIndex--;
|
||||
}
|
||||
proxy->SetNextFree(m_firstFreeLargeHandle);
|
||||
m_firstFreeLargeHandle = handle;
|
||||
proxy->m_clientObject = 0;
|
||||
m_numLargeHandles--;
|
||||
}
|
||||
bool isLargeProxy(const btVector3& aabbMin, const btVector3& aabbMax);
|
||||
bool isLargeProxy(btBroadphaseProxy* proxy);
|
||||
// debug
|
||||
unsigned int m_numPairsAdded;
|
||||
unsigned int m_numPairsRemoved;
|
||||
unsigned int m_numOverflows;
|
||||
//
|
||||
public:
|
||||
virtual int getNumOverlap()
|
||||
{
|
||||
return m_hPairScanChanged[m_numHandles+1];
|
||||
}
|
||||
virtual MyUint2* getOverlap()
|
||||
{
|
||||
return m_hAllOverlappingPairs;
|
||||
}
|
||||
// NOTE : for better results gridSizeX, gridSizeY and gridSizeZ should be powers of 2
|
||||
btGpu3DGridBroadphase(const btVector3& cellSize,
|
||||
int gridSizeX, int gridSizeY, int gridSizeZ,
|
||||
int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
|
||||
btScalar maxSmallProxySize,
|
||||
int maxBodiesPerCell = 8);
|
||||
btGpu3DGridBroadphase( btOverlappingPairCache* overlappingPairCache,
|
||||
const btVector3& cellSize,
|
||||
int gridSizeX, int gridSizeY, int gridSizeZ,
|
||||
int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
|
||||
btScalar maxSmallProxySize,
|
||||
int maxBodiesPerCell = 8);
|
||||
virtual ~btGpu3DGridBroadphase();
|
||||
virtual void calculateOverlappingPairs(btDispatcher* dispatcher);
|
||||
|
||||
virtual btBroadphaseProxy* createProxy(const btVector3& aabbMin, const btVector3& aabbMax,int shapeType,void* userPtr ,short int collisionFilterGroup,short int collisionFilterMask, btDispatcher* dispatcher,void* multiSapProxy);
|
||||
virtual void destroyProxy(btBroadphaseProxy* proxy,btDispatcher* dispatcher);
|
||||
virtual void rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback);
|
||||
virtual void resetPool(btDispatcher* dispatcher);
|
||||
|
||||
static int getFloorPowOfTwo(int val); // returns 2^n : 2^(n+1) > val >= 2^n
|
||||
|
||||
protected:
|
||||
void _initialize( const btVector3& cellSize,
|
||||
int gridSizeX, int gridSizeY, int gridSizeZ,
|
||||
int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
|
||||
btScalar maxSmallProxySize,
|
||||
int maxBodiesPerCell);
|
||||
void _finalize();
|
||||
void addPairsToCache(btDispatcher* dispatcher);
|
||||
void addLarge2LargePairsToCache(btDispatcher* dispatcher);
|
||||
|
||||
// overrides for CPU version
|
||||
virtual void setParameters(bt3DGridBroadphaseParams* hostParams);
|
||||
virtual void prepareAABB();
|
||||
virtual void calcHashAABB();
|
||||
virtual void sortHash();
|
||||
virtual void findCellStart();
|
||||
virtual void findOverlappingPairs();
|
||||
virtual void findPairsLarge();
|
||||
virtual void computePairCacheChanges();
|
||||
virtual void scanOverlappingPairBuff(bool copyToCpu=true);
|
||||
virtual void squeezeOverlappingPairBuff();
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
#endif //BTGPU3DGRIDBROADPHASE_H
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
@@ -0,0 +1,428 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
||||
Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
// K E R N E L F U N C T I O N S
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
// calculate position in uniform grid
|
||||
BT_GPU___device__ int3 bt3DGrid_calcGridPos(float4 p)
|
||||
{
|
||||
int3 gridPos;
|
||||
gridPos.x = (int)floor(p.x * BT_GPU_params.m_invCellSizeX) & (BT_GPU_params.m_gridSizeX - 1);
|
||||
gridPos.y = (int)floor(p.y * BT_GPU_params.m_invCellSizeY) & (BT_GPU_params.m_gridSizeY - 1);
|
||||
gridPos.z = (int)floor(p.z * BT_GPU_params.m_invCellSizeZ) & (BT_GPU_params.m_gridSizeZ - 1);
|
||||
return gridPos;
|
||||
} // bt3DGrid_calcGridPos()
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
// calculate address in grid from position (clamping to edges)
|
||||
BT_GPU___device__ uint bt3DGrid_calcGridHash(int3 gridPos)
|
||||
{
|
||||
gridPos.x &= (BT_GPU_params.m_gridSizeX - 1);
|
||||
gridPos.y &= (BT_GPU_params.m_gridSizeY - 1);
|
||||
gridPos.z &= (BT_GPU_params.m_gridSizeZ - 1);
|
||||
return BT_GPU___mul24(BT_GPU___mul24(gridPos.z, BT_GPU_params.m_gridSizeY), BT_GPU_params.m_gridSizeX) + BT_GPU___mul24(gridPos.y, BT_GPU_params.m_gridSizeX) + gridPos.x;
|
||||
} // bt3DGrid_calcGridHash()
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
// calculate grid hash value for each body using its AABB
|
||||
BT_GPU___global__ void calcHashAABBD(bt3DGrid3F1U* pAABB, uint2* pHash, uint numBodies)
|
||||
{
|
||||
int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
|
||||
if(index >= (int)numBodies)
|
||||
{
|
||||
return;
|
||||
}
|
||||
bt3DGrid3F1U bbMin = pAABB[index*2];
|
||||
bt3DGrid3F1U bbMax = pAABB[index*2 + 1];
|
||||
float4 pos;
|
||||
pos.x = (bbMin.fx + bbMax.fx) * 0.5f;
|
||||
pos.y = (bbMin.fy + bbMax.fy) * 0.5f;
|
||||
pos.z = (bbMin.fz + bbMax.fz) * 0.5f;
|
||||
// get address in grid
|
||||
int3 gridPos = bt3DGrid_calcGridPos(pos);
|
||||
uint gridHash = bt3DGrid_calcGridHash(gridPos);
|
||||
// store grid hash and body index
|
||||
pHash[index] = BT_GPU_make_uint2(gridHash, index);
|
||||
} // calcHashAABBD()
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
BT_GPU___global__ void findCellStartD(uint2* pHash, uint* cellStart, uint numBodies)
|
||||
{
|
||||
int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
|
||||
if(index >= (int)numBodies)
|
||||
{
|
||||
return;
|
||||
}
|
||||
uint2 sortedData = pHash[index];
|
||||
// Load hash data into shared memory so that we can look
|
||||
// at neighboring body's hash value without loading
|
||||
// two hash values per thread
|
||||
BT_GPU___shared__ uint sharedHash[257];
|
||||
sharedHash[BT_GPU_threadIdx.x+1] = sortedData.x;
|
||||
if((index > 0) && (BT_GPU_threadIdx.x == 0))
|
||||
{
|
||||
// first thread in block must load neighbor body hash
|
||||
volatile uint2 prevData = pHash[index-1];
|
||||
sharedHash[0] = prevData.x;
|
||||
}
|
||||
BT_GPU___syncthreads();
|
||||
if((index == 0) || (sortedData.x != sharedHash[BT_GPU_threadIdx.x]))
|
||||
{
|
||||
cellStart[sortedData.x] = index;
|
||||
}
|
||||
} // findCellStartD()
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
BT_GPU___device__ uint cudaTestAABBOverlap(bt3DGrid3F1U min0, bt3DGrid3F1U max0, bt3DGrid3F1U min1, bt3DGrid3F1U max1)
|
||||
{
|
||||
return (min0.fx <= max1.fx)&& (min1.fx <= max0.fx) &&
|
||||
(min0.fy <= max1.fy)&& (min1.fy <= max0.fy) &&
|
||||
(min0.fz <= max1.fz)&& (min1.fz <= max0.fz);
|
||||
} // cudaTestAABBOverlap()
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
BT_GPU___device__ void findPairsInCell( int3 gridPos,
|
||||
uint index,
|
||||
uint2* pHash,
|
||||
uint* pCellStart,
|
||||
bt3DGrid3F1U* pAABB,
|
||||
uint* pPairBuff,
|
||||
uint2* pPairBuffStartCurr,
|
||||
uint numBodies)
|
||||
{
|
||||
uint gridHash = bt3DGrid_calcGridHash(gridPos);
|
||||
// get start of bucket for this cell
|
||||
uint bucketStart = pCellStart[gridHash];
|
||||
if (bucketStart == 0xffffffff)
|
||||
{
|
||||
return; // cell empty
|
||||
}
|
||||
// iterate over bodies in this cell
|
||||
uint2 sortedData = pHash[index];
|
||||
uint unsorted_indx = sortedData.y;
|
||||
bt3DGrid3F1U min0 = BT_GPU_FETCH(pAABB, unsorted_indx*2);
|
||||
bt3DGrid3F1U max0 = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
|
||||
uint handleIndex = min0.uw;
|
||||
uint2 start_curr = pPairBuffStartCurr[handleIndex];
|
||||
uint start = start_curr.x;
|
||||
uint curr = start_curr.y;
|
||||
uint2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
|
||||
uint curr_max = start_curr_next.x - start - 1;
|
||||
uint bucketEnd = bucketStart + BT_GPU_params.m_maxBodiesPerCell;
|
||||
bucketEnd = (bucketEnd > numBodies) ? numBodies : bucketEnd;
|
||||
for(uint index2 = bucketStart; index2 < bucketEnd; index2++)
|
||||
{
|
||||
uint2 cellData = pHash[index2];
|
||||
if (cellData.x != gridHash)
|
||||
{
|
||||
break; // no longer in same bucket
|
||||
}
|
||||
uint unsorted_indx2 = cellData.y;
|
||||
if (unsorted_indx2 < unsorted_indx) // check not colliding with self
|
||||
{
|
||||
bt3DGrid3F1U min1 = BT_GPU_FETCH(pAABB, unsorted_indx2*2);
|
||||
bt3DGrid3F1U max1 = BT_GPU_FETCH(pAABB, unsorted_indx2*2 + 1);
|
||||
if(cudaTestAABBOverlap(min0, max0, min1, max1))
|
||||
{
|
||||
uint handleIndex2 = min1.uw;
|
||||
uint k;
|
||||
for(k = 0; k < curr; k++)
|
||||
{
|
||||
uint old_pair = pPairBuff[start+k] & (~BT_3DGRID_PAIR_ANY_FLG);
|
||||
if(old_pair == handleIndex2)
|
||||
{
|
||||
pPairBuff[start+k] |= BT_3DGRID_PAIR_FOUND_FLG;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(k == curr)
|
||||
{
|
||||
if(curr >= curr_max)
|
||||
{ // not a good solution, but let's avoid crash
|
||||
break;
|
||||
}
|
||||
pPairBuff[start+curr] = handleIndex2 | BT_3DGRID_PAIR_NEW_FLG;
|
||||
curr++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, curr);
|
||||
return;
|
||||
} // findPairsInCell()
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
BT_GPU___global__ void findOverlappingPairsD( bt3DGrid3F1U* pAABB, uint2* pHash, uint* pCellStart,
|
||||
uint* pPairBuff, uint2* pPairBuffStartCurr, uint numBodies)
|
||||
{
|
||||
int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
|
||||
if(index >= (int)numBodies)
|
||||
{
|
||||
return;
|
||||
}
|
||||
uint2 sortedData = pHash[index];
|
||||
uint unsorted_indx = sortedData.y;
|
||||
bt3DGrid3F1U bbMin = BT_GPU_FETCH(pAABB, unsorted_indx*2);
|
||||
bt3DGrid3F1U bbMax = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
|
||||
float4 pos;
|
||||
pos.x = (bbMin.fx + bbMax.fx) * 0.5f;
|
||||
pos.y = (bbMin.fy + bbMax.fy) * 0.5f;
|
||||
pos.z = (bbMin.fz + bbMax.fz) * 0.5f;
|
||||
// get address in grid
|
||||
int3 gridPos = bt3DGrid_calcGridPos(pos);
|
||||
// examine only neighbouring cells
|
||||
for(int z=-1; z<=1; z++) {
|
||||
for(int y=-1; y<=1; y++) {
|
||||
for(int x=-1; x<=1; x++) {
|
||||
findPairsInCell(gridPos + BT_GPU_make_int3(x, y, z), index, pHash, pCellStart, pAABB, pPairBuff, pPairBuffStartCurr, numBodies);
|
||||
}
|
||||
}
|
||||
}
|
||||
} // findOverlappingPairsD()
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
BT_GPU___global__ void findPairsLargeD( bt3DGrid3F1U* pAABB, uint2* pHash, uint* pCellStart, uint* pPairBuff,
|
||||
uint2* pPairBuffStartCurr, uint numBodies, uint numLarge)
|
||||
{
|
||||
int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
|
||||
if(index >= (int)numBodies)
|
||||
{
|
||||
return;
|
||||
}
|
||||
uint2 sortedData = pHash[index];
|
||||
uint unsorted_indx = sortedData.y;
|
||||
bt3DGrid3F1U min0 = BT_GPU_FETCH(pAABB, unsorted_indx*2);
|
||||
bt3DGrid3F1U max0 = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
|
||||
uint handleIndex = min0.uw;
|
||||
uint2 start_curr = pPairBuffStartCurr[handleIndex];
|
||||
uint start = start_curr.x;
|
||||
uint curr = start_curr.y;
|
||||
uint2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
|
||||
uint curr_max = start_curr_next.x - start - 1;
|
||||
for(uint i = 0; i < numLarge; i++)
|
||||
{
|
||||
uint indx2 = numBodies + i;
|
||||
bt3DGrid3F1U min1 = BT_GPU_FETCH(pAABB, indx2*2);
|
||||
bt3DGrid3F1U max1 = BT_GPU_FETCH(pAABB, indx2*2 + 1);
|
||||
if(cudaTestAABBOverlap(min0, max0, min1, max1))
|
||||
{
|
||||
uint k;
|
||||
uint handleIndex2 = min1.uw;
|
||||
for(k = 0; k < curr; k++)
|
||||
{
|
||||
uint old_pair = pPairBuff[start+k] & (~BT_3DGRID_PAIR_ANY_FLG);
|
||||
if(old_pair == handleIndex2)
|
||||
{
|
||||
pPairBuff[start+k] |= BT_3DGRID_PAIR_FOUND_FLG;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(k == curr)
|
||||
{
|
||||
pPairBuff[start+curr] = handleIndex2 | BT_3DGRID_PAIR_NEW_FLG;
|
||||
if(curr >= curr_max)
|
||||
{ // not a good solution, but let's avoid crash
|
||||
break;
|
||||
}
|
||||
curr++;
|
||||
}
|
||||
}
|
||||
}
|
||||
pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, curr);
|
||||
return;
|
||||
} // findPairsLargeD()
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
BT_GPU___global__ void computePairCacheChangesD(uint* pPairBuff, uint2* pPairBuffStartCurr,
|
||||
uint* pPairScan, bt3DGrid3F1U* pAABB, uint numBodies)
|
||||
{
|
||||
int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
|
||||
if(index >= (int)numBodies)
|
||||
{
|
||||
return;
|
||||
}
|
||||
bt3DGrid3F1U bbMin = pAABB[index * 2];
|
||||
uint handleIndex = bbMin.uw;
|
||||
uint2 start_curr = pPairBuffStartCurr[handleIndex];
|
||||
uint start = start_curr.x;
|
||||
uint curr = start_curr.y;
|
||||
uint *pInp = pPairBuff + start;
|
||||
uint num_changes = 0;
|
||||
for(uint k = 0; k < curr; k++, pInp++)
|
||||
{
|
||||
//if(!((*pInp) & BT_3DGRID_PAIR_FOUND_FLG))
|
||||
if(((*pInp) & BT_3DGRID_PAIR_ANY_FLG))
|
||||
{
|
||||
num_changes++;
|
||||
}
|
||||
}
|
||||
pPairScan[index+1] = num_changes;
|
||||
} // computePairCacheChangesD()
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
BT_GPU___global__ void squeezeOverlappingPairBuffD(uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan,
|
||||
uint2* pPairOut, bt3DGrid3F1U* pAABB, uint numBodies)
|
||||
{
|
||||
int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
|
||||
if(index >= (int)numBodies)
|
||||
{
|
||||
return;
|
||||
}
|
||||
bt3DGrid3F1U bbMin = pAABB[index * 2];
|
||||
uint handleIndex = bbMin.uw;
|
||||
uint2 start_curr = pPairBuffStartCurr[handleIndex];
|
||||
uint start = start_curr.x;
|
||||
uint curr = start_curr.y;
|
||||
uint* pInp = pPairBuff + start;
|
||||
uint2* pOut = pPairOut + pPairScan[index+1];
|
||||
uint* pOut2 = pInp;
|
||||
uint num = 0;
|
||||
for(uint k = 0; k < curr; k++, pInp++)
|
||||
{
|
||||
if((*pInp) & BT_3DGRID_PAIR_ANY_FLG)
|
||||
//if(!((*pInp) & BT_3DGRID_PAIR_FOUND_FLG))
|
||||
{
|
||||
pOut->x = handleIndex;
|
||||
pOut->y = (*pInp) & (~BT_3DGRID_PAIR_ANY_FLG);
|
||||
|
||||
pOut++;
|
||||
}
|
||||
if((*pInp) & BT_3DGRID_PAIR_ANY_FLG)
|
||||
{
|
||||
*pOut2 = (*pInp) & (~BT_3DGRID_PAIR_ANY_FLG);
|
||||
pOut2++;
|
||||
num++;
|
||||
}
|
||||
}
|
||||
pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, num);
|
||||
} // squeezeOverlappingPairBuffD()
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
// E N D O F K E R N E L F U N C T I O N S
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
extern "C"
|
||||
{
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
void BT_GPU_PREF(calcHashAABB)(bt3DGrid3F1U* pAABB, unsigned int* hash, unsigned int numBodies)
|
||||
{
|
||||
int numThreads, numBlocks;
|
||||
BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
|
||||
// execute the kernel
|
||||
BT_GPU_EXECKERNEL(numBlocks, numThreads, calcHashAABBD, (pAABB, (uint2*)hash, numBodies));
|
||||
// check if kernel invocation generated an error
|
||||
BT_GPU_CHECK_ERROR("calcHashAABBD kernel execution failed");
|
||||
} // calcHashAABB()
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
void BT_GPU_PREF(findCellStart(unsigned int* hash, unsigned int* cellStart, unsigned int numBodies, unsigned int numCells))
|
||||
{
|
||||
int numThreads, numBlocks;
|
||||
BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
|
||||
BT_GPU_SAFE_CALL(BT_GPU_Memset(cellStart, 0xffffffff, numCells*sizeof(uint)));
|
||||
BT_GPU_EXECKERNEL(numBlocks, numThreads, findCellStartD, ((uint2*)hash, (uint*)cellStart, numBodies));
|
||||
BT_GPU_CHECK_ERROR("Kernel execution failed: findCellStartD");
|
||||
} // findCellStart()
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
void BT_GPU_PREF(findOverlappingPairs(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies))
|
||||
{
|
||||
#if B_CUDA_USE_TEX
|
||||
BT_GPU_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, numBodies * 2 * sizeof(bt3DGrid3F1U)));
|
||||
#endif
|
||||
int numThreads, numBlocks;
|
||||
BT_GPU_PREF(computeGridSize)(numBodies, 64, numBlocks, numThreads);
|
||||
BT_GPU_EXECKERNEL(numBlocks, numThreads, findOverlappingPairsD, (pAABB,(uint2*)pHash,(uint*)pCellStart,(uint*)pPairBuff,(uint2*)pPairBuffStartCurr,numBodies));
|
||||
BT_GPU_CHECK_ERROR("Kernel execution failed: bt_CudaFindOverlappingPairsD");
|
||||
#if B_CUDA_USE_TEX
|
||||
BT_GPU_SAFE_CALL(cudaUnbindTexture(pAABBTex));
|
||||
#endif
|
||||
} // findOverlappingPairs()
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
void BT_GPU_PREF(findPairsLarge(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies, unsigned int numLarge))
|
||||
{
|
||||
#if B_CUDA_USE_TEX
|
||||
BT_GPU_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, (numBodies+numLarge) * 2 * sizeof(bt3DGrid3F1U)));
|
||||
#endif
|
||||
int numThreads, numBlocks;
|
||||
BT_GPU_PREF(computeGridSize)(numBodies, 64, numBlocks, numThreads);
|
||||
BT_GPU_EXECKERNEL(numBlocks, numThreads, findPairsLargeD, (pAABB,(uint2*)pHash,(uint*)pCellStart,(uint*)pPairBuff,(uint2*)pPairBuffStartCurr,numBodies,numLarge));
|
||||
BT_GPU_CHECK_ERROR("Kernel execution failed: btCuda_findPairsLargeD");
|
||||
#if B_CUDA_USE_TEX
|
||||
BT_GPU_SAFE_CALL(cudaUnbindTexture(pAABBTex));
|
||||
#endif
|
||||
} // findPairsLarge()
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
void BT_GPU_PREF(computePairCacheChanges(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, bt3DGrid3F1U* pAABB, unsigned int numBodies))
|
||||
{
|
||||
int numThreads, numBlocks;
|
||||
BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
|
||||
BT_GPU_EXECKERNEL(numBlocks, numThreads, computePairCacheChangesD, ((uint*)pPairBuff,(uint2*)pPairBuffStartCurr,(uint*)pPairScan,pAABB,numBodies));
|
||||
BT_GPU_CHECK_ERROR("Kernel execution failed: btCudaComputePairCacheChangesD");
|
||||
} // computePairCacheChanges()
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
void BT_GPU_PREF(squeezeOverlappingPairBuff(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, unsigned int* pPairOut, bt3DGrid3F1U* pAABB, unsigned int numBodies))
|
||||
{
|
||||
int numThreads, numBlocks;
|
||||
BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
|
||||
BT_GPU_EXECKERNEL(numBlocks, numThreads, squeezeOverlappingPairBuffD, ((uint*)pPairBuff,(uint2*)pPairBuffStartCurr,(uint*)pPairScan,(uint2*)pPairOut,pAABB,numBodies));
|
||||
BT_GPU_CHECK_ERROR("Kernel execution failed: btCudaSqueezeOverlappingPairBuffD");
|
||||
} // btCuda_squeezeOverlappingPairBuff()
|
||||
|
||||
//------------------------------------------------------------------------------------------------
|
||||
|
||||
} // extern "C"
|
||||
|
||||
//------------------------------------------------------------------------------------------------
|
||||
//------------------------------------------------------------------------------------------------
|
||||
//------------------------------------------------------------------------------------------------
|
||||
@@ -0,0 +1,61 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
||||
Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
// Shared definitions for GPU-based 3D Grid collision detection broadphase
|
||||
|
||||
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
// Keep this file free from Bullet headers
|
||||
// it is included into both CUDA and CPU code
|
||||
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef BTGPU3DGRIDBROADPHASESHAREDDEFS_H
|
||||
#define BTGPU3DGRIDBROADPHASESHAREDDEFS_H
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
#include "btGpu3DGridBroadphaseSharedTypes.h"
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
extern "C"
|
||||
{
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
void BT_GPU_PREF(calcHashAABB)(bt3DGrid3F1U* pAABB, unsigned int* hash, unsigned int numBodies);
|
||||
|
||||
void BT_GPU_PREF(findCellStart)(unsigned int* hash, unsigned int* cellStart, unsigned int numBodies, unsigned int numCells);
|
||||
|
||||
void BT_GPU_PREF(findOverlappingPairs)(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies);
|
||||
|
||||
void BT_GPU_PREF(findPairsLarge)(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies, unsigned int numLarge);
|
||||
|
||||
void BT_GPU_PREF(computePairCacheChanges)(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, bt3DGrid3F1U* pAABB, unsigned int numBodies);
|
||||
|
||||
void BT_GPU_PREF(squeezeOverlappingPairBuff)(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, unsigned int* pPairOut, bt3DGrid3F1U* pAABB, unsigned int numBodies);
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
} // extern "C"
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
#endif // BTGPU3DGRIDBROADPHASESHAREDDEFS_H
|
||||
|
||||
@@ -0,0 +1,64 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
||||
Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
// Shared definitions for GPU-based 3D Grid collision detection broadphase
|
||||
|
||||
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
// Keep this file free from Bullet headers
|
||||
// it is included into both CUDA and CPU code
|
||||
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef BTGPU3DGRIDBROADPHASESHAREDTYPES_H
|
||||
#define BTGPU3DGRIDBROADPHASESHAREDTYPES_H
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
#define BT_3DGRID_PAIR_FOUND_FLG (0x40000000)
|
||||
#define BT_3DGRID_PAIR_NEW_FLG (0x20000000)
|
||||
#define BT_3DGRID_PAIR_ANY_FLG (BT_3DGRID_PAIR_FOUND_FLG | BT_3DGRID_PAIR_NEW_FLG)
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
struct bt3DGridBroadphaseParams
|
||||
{
|
||||
unsigned int m_gridSizeX;
|
||||
unsigned int m_gridSizeY;
|
||||
unsigned int m_gridSizeZ;
|
||||
unsigned int m_numCells;
|
||||
float m_invCellSizeX;
|
||||
float m_invCellSizeY;
|
||||
float m_invCellSizeZ;
|
||||
unsigned int m_numBodies;
|
||||
unsigned int m_maxBodiesPerCell;
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
struct bt3DGrid3F1U
|
||||
{
|
||||
float fx;
|
||||
float fy;
|
||||
float fz;
|
||||
unsigned int uw;
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
#endif // BTGPU3DGRIDBROADPHASESHAREDTYPES_H
|
||||
|
||||
@@ -0,0 +1,211 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
||||
Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
// definitions for "GPU on CPU" code
|
||||
|
||||
|
||||
#ifndef BT_GPU_DEFINES_H
|
||||
#define BT_GPU_DEFINES_H
|
||||
|
||||
typedef unsigned int uint;
|
||||
|
||||
struct int2
|
||||
{
|
||||
int x, y;
|
||||
};
|
||||
|
||||
struct uint2
|
||||
{
|
||||
unsigned int x, y;
|
||||
};
|
||||
|
||||
struct int3
|
||||
{
|
||||
int x, y, z;
|
||||
};
|
||||
|
||||
struct uint3
|
||||
{
|
||||
unsigned int x, y, z;
|
||||
};
|
||||
|
||||
struct float4
|
||||
{
|
||||
float x, y, z, w;
|
||||
};
|
||||
|
||||
struct float3
|
||||
{
|
||||
float x, y, z;
|
||||
};
|
||||
|
||||
|
||||
#define BT_GPU___device__ inline
|
||||
#define BT_GPU___devdata__
|
||||
#define BT_GPU___constant__
|
||||
#define BT_GPU_max(a, b) ((a) > (b) ? (a) : (b))
|
||||
#define BT_GPU_min(a, b) ((a) < (b) ? (a) : (b))
|
||||
#define BT_GPU_params s3DGridBroadphaseParams
|
||||
#define BT_GPU___mul24(a, b) ((a)*(b))
|
||||
#define BT_GPU___global__ inline
|
||||
#define BT_GPU___shared__ static
|
||||
#define BT_GPU___syncthreads()
|
||||
#define CUDART_PI_F SIMD_PI
|
||||
|
||||
static inline uint2 bt3dGrid_make_uint2(unsigned int x, unsigned int y)
|
||||
{
|
||||
uint2 t; t.x = x; t.y = y; return t;
|
||||
}
|
||||
#define BT_GPU_make_uint2(x, y) bt3dGrid_make_uint2(x, y)
|
||||
|
||||
static inline int3 bt3dGrid_make_int3(int x, int y, int z)
|
||||
{
|
||||
int3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||
}
|
||||
#define BT_GPU_make_int3(x, y, z) bt3dGrid_make_int3(x, y, z)
|
||||
|
||||
static inline float3 bt3dGrid_make_float3(float x, float y, float z)
|
||||
{
|
||||
float3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||
}
|
||||
#define BT_GPU_make_float3(x, y, z) bt3dGrid_make_float3(x, y, z)
|
||||
|
||||
static inline float3 bt3dGrid_make_float34(float4 f)
|
||||
{
|
||||
float3 t; t.x = f.x; t.y = f.y; t.z = f.z; return t;
|
||||
}
|
||||
#define BT_GPU_make_float34(f) bt3dGrid_make_float34(f)
|
||||
|
||||
static inline float3 bt3dGrid_make_float31(float f)
|
||||
{
|
||||
float3 t; t.x = t.y = t.z = f; return t;
|
||||
}
|
||||
#define BT_GPU_make_float31(x) bt3dGrid_make_float31(x)
|
||||
|
||||
static inline float4 bt3dGrid_make_float42(float3 v, float f)
|
||||
{
|
||||
float4 t; t.x = v.x; t.y = v.y; t.z = v.z; t.w = f; return t;
|
||||
}
|
||||
#define BT_GPU_make_float42(a, b) bt3dGrid_make_float42(a, b)
|
||||
|
||||
static inline float4 bt3dGrid_make_float44(float a, float b, float c, float d)
|
||||
{
|
||||
float4 t; t.x = a; t.y = b; t.z = c; t.w = d; return t;
|
||||
}
|
||||
#define BT_GPU_make_float44(a, b, c, d) bt3dGrid_make_float44(a, b, c, d)
|
||||
|
||||
inline int3 operator+(int3 a, int3 b)
|
||||
{
|
||||
return bt3dGrid_make_int3(a.x + b.x, a.y + b.y, a.z + b.z);
|
||||
}
|
||||
|
||||
inline float4 operator+(const float4& a, const float4& b)
|
||||
{
|
||||
float4 r; r.x = a.x+b.x; r.y = a.y+b.y; r.z = a.z+b.z; r.w = a.w+b.w; return r;
|
||||
}
|
||||
inline float4 operator*(const float4& a, float fact)
|
||||
{
|
||||
float4 r; r.x = a.x*fact; r.y = a.y*fact; r.z = a.z*fact; r.w = a.w*fact; return r;
|
||||
}
|
||||
inline float4 operator*(float fact, float4& a)
|
||||
{
|
||||
return (a * fact);
|
||||
}
|
||||
inline float4& operator*=(float4& a, float fact)
|
||||
{
|
||||
a = fact * a;
|
||||
return a;
|
||||
}
|
||||
inline float4& operator+=(float4& a, const float4& b)
|
||||
{
|
||||
a = a + b;
|
||||
return a;
|
||||
}
|
||||
|
||||
inline float3 operator+(const float3& a, const float3& b)
|
||||
{
|
||||
float3 r; r.x = a.x+b.x; r.y = a.y+b.y; r.z = a.z+b.z; return r;
|
||||
}
|
||||
inline float3 operator-(const float3& a, const float3& b)
|
||||
{
|
||||
float3 r; r.x = a.x-b.x; r.y = a.y-b.y; r.z = a.z-b.z; return r;
|
||||
}
|
||||
static inline float bt3dGrid_dot(float3& a, float3& b)
|
||||
{
|
||||
return a.x*b.x+a.y*b.y+a.z*b.z;
|
||||
}
|
||||
#define BT_GPU_dot(a,b) bt3dGrid_dot(a,b)
|
||||
|
||||
static inline float bt3dGrid_dot4(float4& a, float4& b)
|
||||
{
|
||||
return a.x*b.x+a.y*b.y+a.z*b.z+a.w*b.w;
|
||||
}
|
||||
#define BT_GPU_dot4(a,b) bt3dGrid_dot4(a,b)
|
||||
|
||||
static inline float3 bt3dGrid_cross(const float3& a, const float3& b)
|
||||
{
|
||||
float3 r; r.x = a.y*b.z-a.z*b.y; r.y = -a.x*b.z+a.z*b.x; r.z = a.x*b.y-a.y*b.x; return r;
|
||||
}
|
||||
#define BT_GPU_cross(a,b) bt3dGrid_cross(a,b)
|
||||
|
||||
|
||||
inline float3 operator*(const float3& a, float fact)
|
||||
{
|
||||
float3 r; r.x = a.x*fact; r.y = a.y*fact; r.z = a.z*fact; return r;
|
||||
}
|
||||
|
||||
|
||||
inline float3& operator+=(float3& a, const float3& b)
|
||||
{
|
||||
a = a + b;
|
||||
return a;
|
||||
}
|
||||
inline float3& operator-=(float3& a, const float3& b)
|
||||
{
|
||||
a = a - b;
|
||||
return a;
|
||||
}
|
||||
inline float3& operator*=(float3& a, float fact)
|
||||
{
|
||||
a = a * fact;
|
||||
return a;
|
||||
}
|
||||
inline float3 operator-(const float3& v)
|
||||
{
|
||||
float3 r; r.x = -v.x; r.y = -v.y; r.z = -v.z; return r;
|
||||
}
|
||||
|
||||
|
||||
#define BT_GPU_FETCH(a, b) a[b]
|
||||
#define BT_GPU_FETCH4(a, b) a[b]
|
||||
#define BT_GPU_PREF(func) btGpu_##func
|
||||
#define BT_GPU_SAFE_CALL(func) func
|
||||
#define BT_GPU_Memset memset
|
||||
#define BT_GPU_MemcpyToSymbol(a, b, c) memcpy(&a, b, c)
|
||||
#define BT_GPU_BindTexture(a, b, c, d)
|
||||
#define BT_GPU_UnbindTexture(a)
|
||||
|
||||
static uint2 s_blockIdx, s_blockDim, s_threadIdx;
|
||||
#define BT_GPU_blockIdx s_blockIdx
|
||||
#define BT_GPU_blockDim s_blockDim
|
||||
#define BT_GPU_threadIdx s_threadIdx
|
||||
#define BT_GPU_EXECKERNEL(numb, numt, kfunc, args) {s_blockDim.x=numt;for(int nb=0;nb<numb;nb++){s_blockIdx.x=nb;for(int nt=0;nt<numt;nt++){s_threadIdx.x=nt;kfunc args;}}}
|
||||
|
||||
#define BT_GPU_CHECK_ERROR(s)
|
||||
|
||||
|
||||
#endif //BT_GPU_DEFINES_H
|
||||
@@ -0,0 +1,55 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
||||
Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
// Shared code for GPU-based utilities
|
||||
|
||||
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
// Keep this file free from Bullet headers
|
||||
// will be compiled by both CPU and CUDA compilers
|
||||
// file with definitions of BT_GPU_xxx should be included first
|
||||
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
#include "btGpuUtilsSharedDefs.h"
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
extern "C"
|
||||
{
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
//Round a / b to nearest higher integer value
|
||||
int BT_GPU_PREF(iDivUp)(int a, int b)
|
||||
{
|
||||
return (a % b != 0) ? (a / b + 1) : (a / b);
|
||||
} // iDivUp()
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
// compute grid and thread block size for a given number of elements
|
||||
void BT_GPU_PREF(computeGridSize)(int n, int blockSize, int &numBlocks, int &numThreads)
|
||||
{
|
||||
numThreads = BT_GPU_min(blockSize, n);
|
||||
numBlocks = BT_GPU_PREF(iDivUp)(n, numThreads);
|
||||
} // computeGridSize()
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
} // extern "C"
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
// Shared definitions for GPU-based utilities
|
||||
|
||||
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
// Keep this file free from Bullet headers
|
||||
// it is included into both CUDA and CPU code
|
||||
// file with definitions of BT_GPU_xxx should be included first
|
||||
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
|
||||
#ifndef BTGPUUTILSDHAREDDEFS_H
|
||||
#define BTGPUUTILSDHAREDDEFS_H
|
||||
|
||||
|
||||
extern "C"
|
||||
{
|
||||
|
||||
|
||||
//Round a / b to nearest higher integer value
|
||||
int BT_GPU_PREF(iDivUp)(int a, int b);
|
||||
|
||||
// compute grid and thread block size for a given number of elements
|
||||
void BT_GPU_PREF(computeGridSize)(int n, int blockSize, int &numBlocks, int &numThreads);
|
||||
|
||||
void BT_GPU_PREF(allocateArray)(void** devPtr, unsigned int size);
|
||||
void BT_GPU_PREF(freeArray)(void* devPtr);
|
||||
void BT_GPU_PREF(copyArrayFromDevice)(void* host, const void* device, unsigned int size);
|
||||
void BT_GPU_PREF(copyArrayToDevice)(void* device, const void* host, unsigned int size);
|
||||
void BT_GPU_PREF(registerGLBufferObject(unsigned int vbo));
|
||||
void* BT_GPU_PREF(mapGLBufferObject(unsigned int vbo));
|
||||
void BT_GPU_PREF(unmapGLBufferObject(unsigned int vbo));
|
||||
|
||||
|
||||
} // extern "C"
|
||||
|
||||
|
||||
#endif // BTGPUUTILSDHAREDDEFS_H
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
|
||||
include "AMD"
|
||||
-- include "Intel"
|
||||
-- include "NVIDIA"
|
||||
|
||||
@@ -0,0 +1,23 @@
|
||||
|
||||
hasCL = findOpenCL_AMD()
|
||||
|
||||
if (hasCL) then
|
||||
|
||||
project "OpenCL_intialize_AMD"
|
||||
|
||||
initOpenCL_AMD()
|
||||
|
||||
language "C++"
|
||||
|
||||
kind "ConsoleApp"
|
||||
targetdir "../../../bin"
|
||||
|
||||
-- includedirs {"..","../../../../include/gpu_research"}
|
||||
|
||||
files {
|
||||
"../main.cpp",
|
||||
"../btOpenCLUtils.cpp",
|
||||
"../btOpenCLUtils.h"
|
||||
}
|
||||
|
||||
end
|
||||
@@ -0,0 +1,23 @@
|
||||
|
||||
hasCL = findOpenCL_Intel()
|
||||
|
||||
if (hasCL) then
|
||||
|
||||
project "OpenCL_intialize_Intel"
|
||||
|
||||
initOpenCL_Intel()
|
||||
|
||||
language "C++"
|
||||
|
||||
kind "ConsoleApp"
|
||||
targetdir "../../../bin"
|
||||
|
||||
-- includedirs {"..","../../../../include/gpu_research"}
|
||||
|
||||
files {
|
||||
"../main.cpp",
|
||||
"../btOpenCLUtils.cpp",
|
||||
"../btOpenCLUtils.h"
|
||||
}
|
||||
|
||||
end
|
||||
@@ -0,0 +1,23 @@
|
||||
|
||||
hasCL = findOpenCL_NVIDIA()
|
||||
|
||||
if (hasCL) then
|
||||
|
||||
project "OpenCL_intialize_NVIDIA"
|
||||
|
||||
initOpenCL_NVIDIA()
|
||||
|
||||
language "C++"
|
||||
|
||||
kind "ConsoleApp"
|
||||
targetdir "../../../bin"
|
||||
|
||||
-- includedirs {"..","../../../../include/gpu_research"}
|
||||
|
||||
files {
|
||||
"../main.cpp",
|
||||
"../btOpenCLUtils.cpp",
|
||||
"../btOpenCLUtils.h"
|
||||
}
|
||||
|
||||
end
|
||||
@@ -0,0 +1,43 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library
|
||||
Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
#ifndef BT_OPENCL_INCLUDE_H
|
||||
#define BT_OPENCL_INCLUDE_H
|
||||
|
||||
|
||||
#ifdef __APPLE__
|
||||
#ifdef USE_MINICL
|
||||
#include <MiniCL/cl.h>
|
||||
#else
|
||||
#include <OpenCL/cl.h>
|
||||
#endif
|
||||
#else
|
||||
#ifdef USE_MINICL
|
||||
#include <MiniCL/cl.h>
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
#ifdef _WIN32
|
||||
#include "CL/cl_gl.h"
|
||||
#endif //_WIN32
|
||||
#endif
|
||||
#endif //__APPLE__
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#define oclCHECKERROR(a, b) if((a)!=(b)) { printf("OCL Error : %d\n", (a)); assert((a) == (b)); }
|
||||
|
||||
|
||||
#endif //BT_OPENCL_INCLUDE_H
|
||||
|
||||
@@ -0,0 +1,731 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
||||
Copyright (C) 2006 - 2011 Sony Computer Entertainment Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
//original author: Roman Ponomarev
|
||||
//cleanup by Erwin Coumans
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "btOpenCLUtils.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define BT_MAX_CL_DEVICES 16 //who needs 16 devices?
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <Windows.h>
|
||||
#include <assert.h>
|
||||
|
||||
#define btAssert assert
|
||||
#endif
|
||||
|
||||
//Set the preferred platform vendor using the OpenCL SDK
|
||||
static char* spPlatformVendor =
|
||||
#if defined(CL_PLATFORM_MINI_CL)
|
||||
"MiniCL, SCEA";
|
||||
#elif defined(CL_PLATFORM_AMD)
|
||||
"Advanced Micro Devices, Inc.";
|
||||
#elif defined(CL_PLATFORM_NVIDIA)
|
||||
"NVIDIA Corporation";
|
||||
#elif defined(CL_PLATFORM_INTEL)
|
||||
"Intel(R) Corporation";
|
||||
#else
|
||||
"Unknown Vendor";
|
||||
#endif
|
||||
|
||||
#ifndef CL_PLATFORM_MINI_CL
|
||||
#ifdef _WIN32
|
||||
#include "CL/cl_gl.h"
|
||||
#endif //_WIN32
|
||||
#endif
|
||||
|
||||
int btOpenCLUtils::getNumPlatforms(cl_int* pErrNum)
|
||||
{
|
||||
cl_uint numPlatforms=0;
|
||||
cl_int ciErrNum = clGetPlatformIDs(0, NULL, &numPlatforms);
|
||||
|
||||
if(ciErrNum != CL_SUCCESS)
|
||||
{
|
||||
if(pErrNum != NULL)
|
||||
*pErrNum = ciErrNum;
|
||||
}
|
||||
return numPlatforms;
|
||||
}
|
||||
|
||||
const char* btOpenCLUtils::getSdkVendorName()
|
||||
{
|
||||
return spPlatformVendor;
|
||||
}
|
||||
|
||||
cl_platform_id btOpenCLUtils::getPlatform(int platformIndex, cl_int* pErrNum)
|
||||
{
|
||||
cl_platform_id platform = 0;
|
||||
|
||||
cl_uint numPlatforms;
|
||||
cl_int ciErrNum = clGetPlatformIDs(0, NULL, &numPlatforms);
|
||||
|
||||
if (platformIndex>=0 && platformIndex<numPlatforms)
|
||||
{
|
||||
cl_platform_id* platforms = new cl_platform_id[numPlatforms];
|
||||
ciErrNum = clGetPlatformIDs(numPlatforms, platforms, NULL);
|
||||
if(ciErrNum != CL_SUCCESS)
|
||||
{
|
||||
if(pErrNum != NULL)
|
||||
*pErrNum = ciErrNum;
|
||||
return platform;
|
||||
}
|
||||
|
||||
platform = platforms[platformIndex];
|
||||
|
||||
delete[] platforms;
|
||||
}
|
||||
|
||||
return platform;
|
||||
}
|
||||
|
||||
void btOpenCLUtils::getPlatformInfo(cl_platform_id platform, btOpenCLPlatformInfo& platformInfo)
|
||||
{
|
||||
cl_int ciErrNum;
|
||||
|
||||
ciErrNum = clGetPlatformInfo( platform,CL_PLATFORM_VENDOR,BT_MAX_STRING_LENGTH,platformInfo.m_platformVendor,NULL);
|
||||
oclCHECKERROR(ciErrNum,CL_SUCCESS);
|
||||
ciErrNum = clGetPlatformInfo( platform,CL_PLATFORM_NAME,BT_MAX_STRING_LENGTH,platformInfo.m_platformName,NULL);
|
||||
oclCHECKERROR(ciErrNum,CL_SUCCESS);
|
||||
ciErrNum = clGetPlatformInfo( platform,CL_PLATFORM_VERSION,BT_MAX_STRING_LENGTH,platformInfo.m_platformVersion,NULL);
|
||||
oclCHECKERROR(ciErrNum,CL_SUCCESS);
|
||||
}
|
||||
|
||||
cl_context btOpenCLUtils::createContextFromPlatform(cl_platform_id platform, cl_device_type deviceType, cl_int* pErrNum, void* pGLContext, void* pGLDC, int preferredDeviceIndex, int preferredPlatformIndex)
|
||||
{
|
||||
cl_context retContext = 0;
|
||||
cl_int ciErrNum=0;
|
||||
|
||||
/*
|
||||
* If we could find our platform, use it. Otherwise pass a NULL and get whatever the
|
||||
* implementation thinks we should be using.
|
||||
*/
|
||||
cl_context_properties cps[7] = {0,0,0,0,0,0,0};
|
||||
cps[0] = CL_CONTEXT_PLATFORM;
|
||||
cps[1] = (cl_context_properties)platform;
|
||||
if (pGLContext && pGLDC)
|
||||
{
|
||||
cps[2] = CL_GL_CONTEXT_KHR;
|
||||
cps[3] = (cl_context_properties)pGLContext;
|
||||
cps[4] = CL_WGL_HDC_KHR;
|
||||
cps[5] = (cl_context_properties)pGLDC;
|
||||
}
|
||||
|
||||
cl_uint num_entries = BT_MAX_CL_DEVICES;
|
||||
cl_device_id devices[BT_MAX_CL_DEVICES];
|
||||
|
||||
cl_uint num_devices=-1;
|
||||
|
||||
ciErrNum = clGetDeviceIDs(
|
||||
platform,
|
||||
deviceType,
|
||||
num_entries,
|
||||
devices,
|
||||
&num_devices);
|
||||
|
||||
cl_context_properties* cprops = (NULL == platform) ? NULL : cps;
|
||||
|
||||
if (pGLContext)
|
||||
{
|
||||
//search for the GPU that relates to the OpenCL context
|
||||
for (int i=0;i<num_devices;i++)
|
||||
{
|
||||
retContext = clCreateContext(cprops,1,&devices[i],NULL,NULL,&ciErrNum);
|
||||
if (ciErrNum==CL_SUCCESS)
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (preferredDeviceIndex>=0 && preferredDeviceIndex<num_devices)
|
||||
{
|
||||
//create a context of the preferred device index
|
||||
retContext = clCreateContext(cprops,1,&devices[preferredDeviceIndex],NULL,NULL,&ciErrNum);
|
||||
} else
|
||||
{
|
||||
//create a context of all devices
|
||||
retContext = clCreateContext(cprops,num_devices,devices,NULL,NULL,&ciErrNum);
|
||||
}
|
||||
}
|
||||
if(pErrNum != NULL)
|
||||
{
|
||||
*pErrNum = ciErrNum;
|
||||
};
|
||||
|
||||
return retContext;
|
||||
}
|
||||
|
||||
cl_context btOpenCLUtils::createContextFromType(cl_device_type deviceType, cl_int* pErrNum, void* pGLContext, void* pGLDC , int preferredDeviceIndex, int preferredPlatformIndex)
|
||||
{
|
||||
cl_uint numPlatforms;
|
||||
cl_context retContext = 0;
|
||||
|
||||
cl_int ciErrNum = clGetPlatformIDs(0, NULL, &numPlatforms);
|
||||
if(ciErrNum != CL_SUCCESS)
|
||||
{
|
||||
if(pErrNum != NULL) *pErrNum = ciErrNum;
|
||||
return NULL;
|
||||
}
|
||||
if(numPlatforms > 0)
|
||||
{
|
||||
cl_platform_id* platforms = new cl_platform_id[numPlatforms];
|
||||
ciErrNum = clGetPlatformIDs(numPlatforms, platforms, NULL);
|
||||
if(ciErrNum != CL_SUCCESS)
|
||||
{
|
||||
if(pErrNum != NULL) *pErrNum = ciErrNum;
|
||||
return NULL;
|
||||
}
|
||||
int i;
|
||||
|
||||
|
||||
for ( i = 0; i < numPlatforms; ++i)
|
||||
{
|
||||
char pbuf[128];
|
||||
ciErrNum = clGetPlatformInfo( platforms[i],
|
||||
CL_PLATFORM_VENDOR,
|
||||
sizeof(pbuf),
|
||||
pbuf,
|
||||
NULL);
|
||||
if(ciErrNum != CL_SUCCESS)
|
||||
{
|
||||
if(pErrNum != NULL) *pErrNum = ciErrNum;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (preferredPlatformIndex>=0 && i==preferredPlatformIndex)
|
||||
{
|
||||
cl_platform_id tmpPlatform = platforms[0];
|
||||
platforms[0] = platforms[i];
|
||||
platforms[i] = tmpPlatform;
|
||||
break;
|
||||
} else
|
||||
{
|
||||
if(!strcmp(pbuf, spPlatformVendor))
|
||||
{
|
||||
cl_platform_id tmpPlatform = platforms[0];
|
||||
platforms[0] = platforms[i];
|
||||
platforms[i] = tmpPlatform;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < numPlatforms; ++i)
|
||||
{
|
||||
cl_platform_id platform = platforms[i];
|
||||
assert(platform);
|
||||
|
||||
retContext = btOpenCLUtils::createContextFromPlatform(platform,deviceType,pErrNum,pGLContext,pGLDC,preferredDeviceIndex);
|
||||
|
||||
if (retContext)
|
||||
{
|
||||
// printf("OpenCL platform details:\n");
|
||||
btOpenCLPlatformInfo platformInfo;
|
||||
|
||||
btOpenCLUtils::getPlatformInfo(platform, platformInfo);
|
||||
|
||||
printf(" CL_PLATFORM_VENDOR: \t\t\t%s\n",platformInfo.m_platformVendor);
|
||||
printf(" CL_PLATFORM_NAME: \t\t\t%s\n",platformInfo.m_platformName);
|
||||
printf(" CL_PLATFORM_VERSION: \t\t\t%s\n",platformInfo.m_platformVersion);
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
delete[] platforms;
|
||||
}
|
||||
return retContext;
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Gets the id of the nth device from the context
|
||||
//!
|
||||
//! @return the id or -1 when out of range
|
||||
//! @param cxMainContext OpenCL context
|
||||
//! @param device_idx index of the device of interest
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
cl_device_id btOpenCLUtils::getDevice(cl_context cxMainContext, int deviceIndex)
|
||||
{
|
||||
size_t szParmDataBytes;
|
||||
cl_device_id* cdDevices;
|
||||
|
||||
// get the list of devices associated with context
|
||||
clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
|
||||
|
||||
if( szParmDataBytes / sizeof(cl_device_id) < deviceIndex ) {
|
||||
return (cl_device_id)-1;
|
||||
}
|
||||
|
||||
cdDevices = (cl_device_id*) malloc(szParmDataBytes);
|
||||
|
||||
clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
|
||||
|
||||
cl_device_id device = cdDevices[deviceIndex];
|
||||
free(cdDevices);
|
||||
|
||||
return device;
|
||||
}
|
||||
|
||||
int btOpenCLUtils::getNumDevices(cl_context cxMainContext)
|
||||
{
|
||||
size_t szParamDataBytes;
|
||||
clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &szParamDataBytes);
|
||||
int device_count = (int) szParamDataBytes/ sizeof(cl_device_id);
|
||||
return device_count;
|
||||
}
|
||||
|
||||
void btOpenCLUtils::printDeviceInfo(cl_device_id device)
|
||||
{
|
||||
btOpenCLDeviceInfo info;
|
||||
getDeviceInfo(device,info);
|
||||
|
||||
printf(" CL_DEVICE_NAME: \t\t\t%s\n", info.m_deviceName);
|
||||
printf(" CL_DEVICE_VENDOR: \t\t\t%s\n", info.m_deviceVendor);
|
||||
printf(" CL_DRIVER_VERSION: \t\t\t%s\n", info.m_driverVersion);
|
||||
|
||||
if( info.m_deviceType & CL_DEVICE_TYPE_CPU )
|
||||
printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_CPU");
|
||||
if( info.m_deviceType & CL_DEVICE_TYPE_GPU )
|
||||
printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_GPU");
|
||||
if( info.m_deviceType & CL_DEVICE_TYPE_ACCELERATOR )
|
||||
printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_ACCELERATOR");
|
||||
if( info.m_deviceType & CL_DEVICE_TYPE_DEFAULT )
|
||||
printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_DEFAULT");
|
||||
|
||||
printf(" CL_DEVICE_MAX_COMPUTE_UNITS:\t\t%u\n", info.m_computeUnits);
|
||||
printf(" CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:\t%u\n", info.m_workitemDims);
|
||||
printf(" CL_DEVICE_MAX_WORK_ITEM_SIZES:\t%u / %u / %u \n", info.m_workItemSize[0], info.m_workItemSize[1], info.m_workItemSize[2]);
|
||||
printf(" CL_DEVICE_MAX_WORK_GROUP_SIZE:\t%u\n", info.m_workgroupSize);
|
||||
printf(" CL_DEVICE_MAX_CLOCK_FREQUENCY:\t%u MHz\n", info.m_clockFrequency);
|
||||
printf(" CL_DEVICE_ADDRESS_BITS:\t\t%u\n", info.m_addressBits);
|
||||
printf(" CL_DEVICE_MAX_MEM_ALLOC_SIZE:\t\t%u MByte\n", (unsigned int)(info.m_maxMemAllocSize/ (1024 * 1024)));
|
||||
printf(" CL_DEVICE_GLOBAL_MEM_SIZE:\t\t%u MByte\n", (unsigned int)(info.m_globalMemSize/ (1024 * 1024)));
|
||||
printf(" CL_DEVICE_ERROR_CORRECTION_SUPPORT:\t%s\n", info.m_errorCorrectionSupport== CL_TRUE ? "yes" : "no");
|
||||
printf(" CL_DEVICE_LOCAL_MEM_TYPE:\t\t%s\n", info.m_localMemType == 1 ? "local" : "global");
|
||||
printf(" CL_DEVICE_LOCAL_MEM_SIZE:\t\t%u KByte\n", (unsigned int)(info.m_localMemSize / 1024));
|
||||
printf(" CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:\t%u KByte\n", (unsigned int)(info.m_constantBufferSize / 1024));
|
||||
if( info.m_queueProperties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE )
|
||||
printf(" CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE");
|
||||
if( info.m_queueProperties & CL_QUEUE_PROFILING_ENABLE )
|
||||
printf(" CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_PROFILING_ENABLE");
|
||||
|
||||
printf(" CL_DEVICE_IMAGE_SUPPORT:\t\t%u\n", info.m_imageSupport);
|
||||
|
||||
printf(" CL_DEVICE_MAX_READ_IMAGE_ARGS:\t%u\n", info.m_maxReadImageArgs);
|
||||
printf(" CL_DEVICE_MAX_WRITE_IMAGE_ARGS:\t%u\n", info.m_maxWriteImageArgs);
|
||||
printf("\n CL_DEVICE_IMAGE <dim>");
|
||||
printf("\t\t\t2D_MAX_WIDTH\t %u\n", info.m_image2dMaxWidth);
|
||||
printf("\t\t\t\t\t2D_MAX_HEIGHT\t %u\n", info.m_image2dMaxHeight);
|
||||
printf("\t\t\t\t\t3D_MAX_WIDTH\t %u\n", info.m_image3dMaxWidth);
|
||||
printf("\t\t\t\t\t3D_MAX_HEIGHT\t %u\n", info.m_image3dMaxHeight);
|
||||
printf("\t\t\t\t\t3D_MAX_DEPTH\t %u\n", info.m_image3dMaxDepth);
|
||||
if (info.m_deviceExtensions != 0)
|
||||
printf("\n CL_DEVICE_EXTENSIONS:%s\n",info.m_deviceExtensions);
|
||||
else
|
||||
printf(" CL_DEVICE_EXTENSIONS: None\n");
|
||||
printf(" CL_DEVICE_PREFERRED_VECTOR_WIDTH_<t>\t");
|
||||
printf("CHAR %u, SHORT %u, INT %u,LONG %u, FLOAT %u, DOUBLE %u\n\n\n",
|
||||
info.m_vecWidthChar, info.m_vecWidthShort, info.m_vecWidthInt, info.m_vecWidthLong,info.m_vecWidthFloat, info.m_vecWidthDouble);
|
||||
|
||||
|
||||
}
|
||||
|
||||
void btOpenCLUtils::getDeviceInfo(cl_device_id device, btOpenCLDeviceInfo& info)
|
||||
{
|
||||
|
||||
// CL_DEVICE_NAME
|
||||
clGetDeviceInfo(device, CL_DEVICE_NAME, BT_MAX_STRING_LENGTH, &info.m_deviceName, NULL);
|
||||
|
||||
// CL_DEVICE_VENDOR
|
||||
clGetDeviceInfo(device, CL_DEVICE_VENDOR, BT_MAX_STRING_LENGTH, &info.m_deviceVendor, NULL);
|
||||
|
||||
// CL_DRIVER_VERSION
|
||||
clGetDeviceInfo(device, CL_DRIVER_VERSION, BT_MAX_STRING_LENGTH, &info.m_driverVersion, NULL);
|
||||
|
||||
// CL_DEVICE_INFO
|
||||
clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(cl_device_type), &info.m_deviceType, NULL);
|
||||
|
||||
// CL_DEVICE_MAX_COMPUTE_UNITS
|
||||
clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(info.m_computeUnits), &info.m_computeUnits, NULL);
|
||||
|
||||
// CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS
|
||||
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(info.m_workitemDims), &info.m_workitemDims, NULL);
|
||||
|
||||
// CL_DEVICE_MAX_WORK_ITEM_SIZES
|
||||
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(info.m_workItemSize), &info.m_workItemSize, NULL);
|
||||
|
||||
// CL_DEVICE_MAX_WORK_GROUP_SIZE
|
||||
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(info.m_workgroupSize), &info.m_workgroupSize, NULL);
|
||||
|
||||
// CL_DEVICE_MAX_CLOCK_FREQUENCY
|
||||
clGetDeviceInfo(device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(info.m_clockFrequency), &info.m_clockFrequency, NULL);
|
||||
|
||||
// CL_DEVICE_ADDRESS_BITS
|
||||
clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(info.m_addressBits), &info.m_addressBits, NULL);
|
||||
|
||||
// CL_DEVICE_MAX_MEM_ALLOC_SIZE
|
||||
clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(info.m_maxMemAllocSize), &info.m_maxMemAllocSize, NULL);
|
||||
|
||||
// CL_DEVICE_GLOBAL_MEM_SIZE
|
||||
clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(info.m_globalMemSize), &info.m_globalMemSize, NULL);
|
||||
|
||||
// CL_DEVICE_ERROR_CORRECTION_SUPPORT
|
||||
clGetDeviceInfo(device, CL_DEVICE_ERROR_CORRECTION_SUPPORT, sizeof(info.m_errorCorrectionSupport), &info.m_errorCorrectionSupport, NULL);
|
||||
|
||||
// CL_DEVICE_LOCAL_MEM_TYPE
|
||||
clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(info.m_localMemType), &info.m_localMemType, NULL);
|
||||
|
||||
// CL_DEVICE_LOCAL_MEM_SIZE
|
||||
clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(info.m_localMemSize), &info.m_localMemSize, NULL);
|
||||
|
||||
// CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE
|
||||
clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(info.m_constantBufferSize), &info.m_constantBufferSize, NULL);
|
||||
|
||||
// CL_DEVICE_QUEUE_PROPERTIES
|
||||
clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES, sizeof(info.m_queueProperties), &info.m_queueProperties, NULL);
|
||||
|
||||
// CL_DEVICE_IMAGE_SUPPORT
|
||||
clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(info.m_imageSupport), &info.m_imageSupport, NULL);
|
||||
|
||||
// CL_DEVICE_MAX_READ_IMAGE_ARGS
|
||||
clGetDeviceInfo(device, CL_DEVICE_MAX_READ_IMAGE_ARGS, sizeof(info.m_maxReadImageArgs), &info.m_maxReadImageArgs, NULL);
|
||||
|
||||
// CL_DEVICE_MAX_WRITE_IMAGE_ARGS
|
||||
clGetDeviceInfo(device, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, sizeof(info.m_maxWriteImageArgs), &info.m_maxWriteImageArgs, NULL);
|
||||
|
||||
// CL_DEVICE_IMAGE2D_MAX_WIDTH, CL_DEVICE_IMAGE2D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_WIDTH, CL_DEVICE_IMAGE3D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_DEPTH
|
||||
clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &info.m_image2dMaxWidth, NULL);
|
||||
clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &info.m_image2dMaxHeight, NULL);
|
||||
clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(size_t), &info.m_image3dMaxWidth, NULL);
|
||||
clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(size_t), &info.m_image3dMaxHeight, NULL);
|
||||
clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(size_t), &info.m_image3dMaxDepth, NULL);
|
||||
|
||||
// CL_DEVICE_EXTENSIONS: get device extensions, and if any then parse & log the string onto separate lines
|
||||
clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, BT_MAX_STRING_LENGTH, &info.m_deviceExtensions, NULL);
|
||||
|
||||
// CL_DEVICE_PREFERRED_VECTOR_WIDTH_<type>
|
||||
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, sizeof(cl_uint), &info.m_vecWidthChar, NULL);
|
||||
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, sizeof(cl_uint), &info.m_vecWidthShort, NULL);
|
||||
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), &info.m_vecWidthInt, NULL);
|
||||
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, sizeof(cl_uint), &info.m_vecWidthLong, NULL);
|
||||
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(cl_uint), &info.m_vecWidthFloat, NULL);
|
||||
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, sizeof(cl_uint), &info.m_vecWidthDouble, NULL);
|
||||
}
|
||||
|
||||
static const char* strip2(const char* name, const char* pattern)
|
||||
{
|
||||
size_t const patlen = strlen(pattern);
|
||||
size_t patcnt = 0;
|
||||
const char * oriptr;
|
||||
const char * patloc;
|
||||
// find how many times the pattern occurs in the original string
|
||||
for (oriptr = name; patloc = strstr(oriptr, pattern); oriptr = patloc + patlen)
|
||||
{
|
||||
patcnt++;
|
||||
}
|
||||
return oriptr;
|
||||
}
|
||||
|
||||
cl_program btOpenCLUtils::compileCLProgramFromString(cl_context clContext, cl_device_id device, const char* kernelSource, cl_int* pErrNum, const char* additionalMacros , const char* clFileNameForCaching)
|
||||
{
|
||||
|
||||
cl_program m_cpProgram=0;
|
||||
cl_int status;
|
||||
|
||||
char binaryFileName[522];
|
||||
|
||||
if (clFileNameForCaching)
|
||||
{
|
||||
|
||||
char deviceName[256];
|
||||
char driverVersion[256];
|
||||
clGetDeviceInfo(device, CL_DEVICE_NAME, 256, &deviceName, NULL);
|
||||
clGetDeviceInfo(device, CL_DRIVER_VERSION, 256, &driverVersion, NULL);
|
||||
|
||||
|
||||
const char* strippedName = strip2(clFileNameForCaching,"\\");
|
||||
strippedName = strip2(strippedName,"/");
|
||||
|
||||
sprintf_s(binaryFileName,"cache/%s.%s.%s.bin",strippedName, deviceName,driverVersion );
|
||||
//printf("searching for %s\n", binaryFileName);
|
||||
|
||||
bool fileUpToDate = false;
|
||||
bool binaryFileValid=false;
|
||||
|
||||
FILETIME modtimeBinary;
|
||||
|
||||
#ifdef _WIN32
|
||||
CreateDirectory("cache",0);
|
||||
{
|
||||
|
||||
HANDLE binaryFileHandle = CreateFile(binaryFileName,GENERIC_READ,0,0,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,0);
|
||||
if (binaryFileHandle ==INVALID_HANDLE_VALUE)
|
||||
{
|
||||
DWORD errorCode;
|
||||
errorCode = GetLastError();
|
||||
switch (errorCode)
|
||||
{
|
||||
case ERROR_FILE_NOT_FOUND:
|
||||
{
|
||||
printf("\nCached file not found %s\n", binaryFileName);
|
||||
break;
|
||||
}
|
||||
case ERROR_PATH_NOT_FOUND:
|
||||
{
|
||||
printf("\nCached file path not found %s\n", binaryFileName);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
printf("\nFailed reading cached file with errorCode = %d\n", errorCode);
|
||||
}
|
||||
}
|
||||
} else
|
||||
{
|
||||
if (GetFileTime(binaryFileHandle, NULL, NULL, &modtimeBinary)==0)
|
||||
{
|
||||
DWORD errorCode;
|
||||
errorCode = GetLastError();
|
||||
printf("\nGetFileTime errorCode = %d\n", errorCode);
|
||||
} else
|
||||
{
|
||||
binaryFileValid = true;
|
||||
}
|
||||
CloseHandle(binaryFileHandle);
|
||||
}
|
||||
|
||||
if (binaryFileValid)
|
||||
{
|
||||
HANDLE srcFileHandle = CreateFile(clFileNameForCaching,GENERIC_READ,0,0,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,0);
|
||||
if (srcFileHandle!=INVALID_HANDLE_VALUE)
|
||||
{
|
||||
FILETIME modtimeSrc;
|
||||
if (GetFileTime(srcFileHandle, NULL, NULL, &modtimeSrc)==0)
|
||||
{
|
||||
DWORD errorCode;
|
||||
errorCode = GetLastError();
|
||||
printf("\nGetFileTime errorCode = %d\n", errorCode);
|
||||
}
|
||||
if ( ( modtimeSrc.dwHighDateTime < modtimeBinary.dwHighDateTime)
|
||||
||(( modtimeSrc.dwHighDateTime == modtimeBinary.dwHighDateTime)&&(modtimeSrc.dwLowDateTime <= modtimeBinary.dwLowDateTime)))
|
||||
{
|
||||
fileUpToDate=true;
|
||||
} else
|
||||
{
|
||||
printf("\nCached binary file out-of-date (%s)\n",binaryFileName);
|
||||
}
|
||||
CloseHandle(srcFileHandle);
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef _DEBUG
|
||||
DWORD errorCode;
|
||||
errorCode = GetLastError();
|
||||
switch (errorCode)
|
||||
{
|
||||
case ERROR_FILE_NOT_FOUND:
|
||||
{
|
||||
printf("\nSrc file not found %s\n", clFileNameForCaching);
|
||||
break;
|
||||
}
|
||||
case ERROR_PATH_NOT_FOUND:
|
||||
{
|
||||
printf("\nSrc path not found %s\n", clFileNameForCaching);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
printf("\nnSrc file reading errorCode = %d\n", errorCode);
|
||||
}
|
||||
}
|
||||
|
||||
//we should make sure the src file exists so we can verify the timestamp with binary
|
||||
assert(0);
|
||||
#else
|
||||
//if we cannot find the source, assume it is OK in release builds
|
||||
fileUpToDate = true;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
if( fileUpToDate)
|
||||
{
|
||||
FILE* file = fopen(binaryFileName, "rb");
|
||||
if (file)
|
||||
{
|
||||
fseek( file, 0L, SEEK_END );
|
||||
size_t binarySize = ftell( file );
|
||||
rewind( file );
|
||||
char* binary = new char[binarySize];
|
||||
fread( binary, sizeof(char), binarySize, file );
|
||||
fclose( file );
|
||||
|
||||
m_cpProgram = clCreateProgramWithBinary( clContext, 1,&device, &binarySize, (const unsigned char**)&binary, 0, &status );
|
||||
btAssert( status == CL_SUCCESS );
|
||||
status = clBuildProgram( m_cpProgram, 1, &device, additionalMacros, 0, 0 );
|
||||
btAssert( status == CL_SUCCESS );
|
||||
|
||||
if( status != CL_SUCCESS )
|
||||
{
|
||||
char *build_log;
|
||||
size_t ret_val_size;
|
||||
clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
|
||||
build_log = new char[ret_val_size+1];
|
||||
clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
|
||||
build_log[ret_val_size] = '\0';
|
||||
printf("%s\n", build_log);
|
||||
delete build_log;
|
||||
btAssert(0);
|
||||
m_cpProgram = 0;
|
||||
}
|
||||
delete[] binary;
|
||||
}
|
||||
}
|
||||
#endif //_WIN32
|
||||
|
||||
}
|
||||
|
||||
if (!m_cpProgram)
|
||||
{
|
||||
cl_kernel kernel;
|
||||
cl_int localErrNum;
|
||||
size_t program_length = strlen(kernelSource);
|
||||
|
||||
m_cpProgram = clCreateProgramWithSource(clContext, 1, (const char**)&kernelSource, &program_length, &localErrNum);
|
||||
if (localErrNum!= CL_SUCCESS)
|
||||
{
|
||||
if (pErrNum)
|
||||
*pErrNum = localErrNum;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Build the program with 'mad' Optimization option
|
||||
|
||||
|
||||
#ifdef MAC
|
||||
char* flags = "-cl-mad-enable -DMAC -DGUID_ARG";
|
||||
#else
|
||||
//const char* flags = "-DGUID_ARG= -fno-alias";
|
||||
const char* flags = "-DGUID_ARG= ";
|
||||
#endif
|
||||
|
||||
char* compileFlags = new char[strlen(additionalMacros) + strlen(flags) + 5];
|
||||
sprintf(compileFlags, "%s %s", flags, additionalMacros);
|
||||
localErrNum = clBuildProgram(m_cpProgram, 1, &device, compileFlags, NULL, NULL);
|
||||
if (localErrNum!= CL_SUCCESS)
|
||||
{
|
||||
char *build_log;
|
||||
size_t ret_val_size;
|
||||
clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
|
||||
build_log = new char[ret_val_size+1];
|
||||
clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
|
||||
|
||||
// to be carefully, terminate with \0
|
||||
// there's no information in the reference whether the string is 0 terminated or not
|
||||
build_log[ret_val_size] = '\0';
|
||||
|
||||
|
||||
printf("Error in clBuildProgram, Line %u in file %s, Log: \n%s\n !!!\n\n", __LINE__, __FILE__, build_log);
|
||||
delete[] build_log;
|
||||
if (pErrNum)
|
||||
*pErrNum = localErrNum;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if( clFileNameForCaching )
|
||||
{ // write to binary
|
||||
|
||||
cl_uint numAssociatedDevices;
|
||||
status = clGetProgramInfo( m_cpProgram, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &numAssociatedDevices, 0 );
|
||||
btAssert( status == CL_SUCCESS );
|
||||
if (numAssociatedDevices==1)
|
||||
{
|
||||
|
||||
size_t binarySize;
|
||||
status = clGetProgramInfo( m_cpProgram, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binarySize, 0 );
|
||||
btAssert( status == CL_SUCCESS );
|
||||
|
||||
char* binary = new char[binarySize];
|
||||
|
||||
status = clGetProgramInfo( m_cpProgram, CL_PROGRAM_BINARIES, sizeof(char*), &binary, 0 );
|
||||
btAssert( status == CL_SUCCESS );
|
||||
|
||||
{
|
||||
FILE* file = fopen(binaryFileName, "wb");
|
||||
if (file)
|
||||
{
|
||||
fwrite( binary, sizeof(char), binarySize, file );
|
||||
fclose( file );
|
||||
} else
|
||||
{
|
||||
printf("cannot write file %s\n", binaryFileName);
|
||||
}
|
||||
}
|
||||
|
||||
delete [] binary;
|
||||
}
|
||||
}
|
||||
delete [] compileFlags;
|
||||
}
|
||||
|
||||
return m_cpProgram;
|
||||
}
|
||||
|
||||
|
||||
cl_kernel btOpenCLUtils::compileCLKernelFromString(cl_context clContext, cl_device_id device, const char* kernelSource, const char* kernelName, cl_int* pErrNum, cl_program prog, const char* additionalMacros )
|
||||
{
|
||||
printf("compiling kernel %s ",kernelName);
|
||||
cl_kernel kernel;
|
||||
cl_int localErrNum;
|
||||
size_t program_length = strlen(kernelSource);
|
||||
|
||||
|
||||
cl_program m_cpProgram = prog;
|
||||
if (!m_cpProgram)
|
||||
{
|
||||
m_cpProgram = compileCLProgramFromString(clContext,device,kernelSource,pErrNum, additionalMacros);
|
||||
}
|
||||
|
||||
|
||||
// Create the kernel
|
||||
kernel = clCreateKernel(m_cpProgram, kernelName, &localErrNum);
|
||||
if (localErrNum != CL_SUCCESS)
|
||||
{
|
||||
printf("Error in clCreateKernel, Line %u in file %s, cannot find kernel function %s !!!\n\n", __LINE__, __FILE__, kernelName);
|
||||
if (pErrNum)
|
||||
*pErrNum = localErrNum;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!prog && m_cpProgram)
|
||||
{
|
||||
clReleaseProgram(m_cpProgram);
|
||||
}
|
||||
printf("ready. \n");
|
||||
|
||||
|
||||
if (pErrNum)
|
||||
*pErrNum = CL_SUCCESS;
|
||||
return kernel;
|
||||
|
||||
}
|
||||
@@ -0,0 +1,104 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
||||
Copyright (C) 2006 - 2011 Sony Computer Entertainment Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
//original author: Roman Ponomarev
|
||||
//cleanup by Erwin Coumans
|
||||
|
||||
#ifndef BT_OPENCL_UTILS_H
|
||||
#define BT_OPENCL_UTILS_H
|
||||
|
||||
#include "btOpenCLInclude.h"
|
||||
|
||||
|
||||
#define BT_MAX_STRING_LENGTH 1024
|
||||
|
||||
struct btOpenCLDeviceInfo
|
||||
{
|
||||
char m_deviceName[BT_MAX_STRING_LENGTH];
|
||||
char m_deviceVendor[BT_MAX_STRING_LENGTH];
|
||||
char m_driverVersion[BT_MAX_STRING_LENGTH];
|
||||
char m_deviceExtensions[BT_MAX_STRING_LENGTH];
|
||||
|
||||
cl_device_type m_deviceType;
|
||||
cl_uint m_computeUnits;
|
||||
size_t m_workitemDims;
|
||||
size_t m_workItemSize[3];
|
||||
size_t m_image2dMaxWidth;
|
||||
size_t m_image2dMaxHeight;
|
||||
size_t m_image3dMaxWidth;
|
||||
size_t m_image3dMaxHeight;
|
||||
size_t m_image3dMaxDepth;
|
||||
size_t m_workgroupSize;
|
||||
cl_uint m_clockFrequency;
|
||||
cl_ulong m_constantBufferSize;
|
||||
cl_ulong m_localMemSize;
|
||||
cl_ulong m_globalMemSize;
|
||||
cl_bool m_errorCorrectionSupport;
|
||||
cl_device_local_mem_type m_localMemType;
|
||||
cl_uint m_maxReadImageArgs;
|
||||
cl_uint m_maxWriteImageArgs;
|
||||
|
||||
|
||||
|
||||
cl_uint m_addressBits;
|
||||
cl_ulong m_maxMemAllocSize;
|
||||
cl_command_queue_properties m_queueProperties;
|
||||
cl_bool m_imageSupport;
|
||||
cl_uint m_vecWidthChar;
|
||||
cl_uint m_vecWidthShort;
|
||||
cl_uint m_vecWidthInt;
|
||||
cl_uint m_vecWidthLong;
|
||||
cl_uint m_vecWidthFloat;
|
||||
cl_uint m_vecWidthDouble;
|
||||
|
||||
};
|
||||
|
||||
struct btOpenCLPlatformInfo
|
||||
{
|
||||
char m_platformVendor[BT_MAX_STRING_LENGTH];
|
||||
char m_platformName[BT_MAX_STRING_LENGTH];
|
||||
char m_platformVersion[BT_MAX_STRING_LENGTH];
|
||||
};
|
||||
|
||||
class btOpenCLUtils
|
||||
{
|
||||
public:
|
||||
|
||||
/// CL Context optionally takes a GL context. This is a generic type because we don't really want this code
|
||||
/// to have to understand GL types. It is a HGLRC in _WIN32 or a GLXContext otherwise.
|
||||
static cl_context createContextFromType(cl_device_type deviceType, cl_int* pErrNum, void* pGLCtx = 0, void* pGLDC = 0, int preferredDeviceIndex = -1, int preferredPlatformIndex= - 1);
|
||||
|
||||
static int getNumDevices(cl_context cxMainContext);
|
||||
static cl_device_id getDevice(cl_context cxMainContext, int nr);
|
||||
static void getDeviceInfo(cl_device_id device, btOpenCLDeviceInfo& info);
|
||||
static void printDeviceInfo(cl_device_id device);
|
||||
|
||||
static cl_kernel compileCLKernelFromString( cl_context clContext,cl_device_id device, const char* kernelSource, const char* kernelName, cl_int* pErrNum=0, cl_program prog=0,const char* additionalMacros = "" );
|
||||
|
||||
//optional
|
||||
static cl_program compileCLProgramFromString( cl_context clContext,cl_device_id device, const char* kernelSource, cl_int* pErrNum=0,const char* additionalMacros = "" , const char* srcFileNameForCaching=0);
|
||||
|
||||
//the following optional APIs provide access using specific platform information
|
||||
static int getNumPlatforms(cl_int* pErrNum=0);
|
||||
///get the nr'th platform, where nr is in the range [0..getNumPlatforms)
|
||||
static cl_platform_id getPlatform(int nr, cl_int* pErrNum=0);
|
||||
static void getPlatformInfo(cl_platform_id platform, btOpenCLPlatformInfo& platformInfo);
|
||||
static const char* getSdkVendorName();
|
||||
static cl_context createContextFromPlatform(cl_platform_id platform, cl_device_type deviceType, cl_int* pErrNum, void* pGLCtx = 0, void* pGLDC = 0,int preferredDeviceIndex = -1, int preferredPlatformIndex= -1);
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif // BT_OPENCL_UTILS_H
|
||||
92
Extras/RigidBodyGpuPipeline/opencl/basic_initialize/main.cpp
Normal file
92
Extras/RigidBodyGpuPipeline/opencl/basic_initialize/main.cpp
Normal file
@@ -0,0 +1,92 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library
|
||||
Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
///original author: Erwin Coumans
|
||||
|
||||
#include "btOpenCLUtils.h"
|
||||
#include <stdio.h>
|
||||
|
||||
cl_context g_cxMainContext;
|
||||
cl_command_queue g_cqCommandQue;
|
||||
|
||||
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
int ciErrNum = 0;
|
||||
|
||||
cl_device_type deviceType = CL_DEVICE_TYPE_ALL;
|
||||
const char* vendorSDK = btOpenCLUtils::getSdkVendorName();
|
||||
|
||||
printf("This program was compiled using the %s OpenCL SDK\n",vendorSDK);
|
||||
int numPlatforms = btOpenCLUtils::getNumPlatforms();
|
||||
printf("Num Platforms = %d\n", numPlatforms);
|
||||
|
||||
for (int i=0;i<numPlatforms;i++)
|
||||
{
|
||||
cl_platform_id platform = btOpenCLUtils::getPlatform(i);
|
||||
btOpenCLPlatformInfo platformInfo;
|
||||
btOpenCLUtils::getPlatformInfo(platform,platformInfo);
|
||||
printf("--------------------------------\n");
|
||||
printf("Platform info for platform nr %d:\n",i);
|
||||
printf(" CL_PLATFORM_VENDOR: \t\t\t%s\n",platformInfo.m_platformVendor);
|
||||
printf(" CL_PLATFORM_NAME: \t\t\t%s\n",platformInfo.m_platformName);
|
||||
printf(" CL_PLATFORM_VERSION: \t\t\t%s\n",platformInfo.m_platformVersion);
|
||||
|
||||
cl_context context = btOpenCLUtils::createContextFromPlatform(platform,deviceType,&ciErrNum);
|
||||
|
||||
int numDevices = btOpenCLUtils::getNumDevices(context);
|
||||
printf("Num Devices = %d\n", numDevices);
|
||||
for (int j=0;j<numDevices;j++)
|
||||
{
|
||||
cl_device_id dev = btOpenCLUtils::getDevice(context,j);
|
||||
btOpenCLDeviceInfo devInfo;
|
||||
btOpenCLUtils::getDeviceInfo(dev,devInfo);
|
||||
btOpenCLUtils::printDeviceInfo(dev);
|
||||
}
|
||||
|
||||
clReleaseContext(context);
|
||||
}
|
||||
|
||||
///Easier method to initialize OpenCL using createContextFromType for a GPU
|
||||
deviceType = CL_DEVICE_TYPE_GPU;
|
||||
|
||||
void* glCtx=0;
|
||||
void* glDC = 0;
|
||||
printf("Initialize OpenCL using btOpenCLUtils::createContextFromType for CL_DEVICE_TYPE_GPU\n");
|
||||
g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
int numDev = btOpenCLUtils::getNumDevices(g_cxMainContext);
|
||||
|
||||
for (int i=0;i<numDev;i++)
|
||||
{
|
||||
cl_device_id device;
|
||||
device = btOpenCLUtils::getDevice(g_cxMainContext,i);
|
||||
btOpenCLDeviceInfo clInfo;
|
||||
btOpenCLUtils::getDeviceInfo(device,clInfo);
|
||||
btOpenCLUtils::printDeviceInfo(device);
|
||||
// create a command-queue
|
||||
g_cqCommandQue = clCreateCommandQueue(g_cxMainContext, device, 0, &ciErrNum);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
//normally you would create and execute kernels using this command queue
|
||||
|
||||
clReleaseCommandQueue(g_cqCommandQue);
|
||||
}
|
||||
|
||||
clReleaseContext(g_cxMainContext);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,4 @@
|
||||
|
||||
include "AMD"
|
||||
include "Intel"
|
||||
include "NVIDIA"
|
||||
@@ -0,0 +1,49 @@
|
||||
|
||||
hasCL = findOpenCL_AMD()
|
||||
|
||||
if (hasCL) then
|
||||
|
||||
project "OpenCL_broadphase_benchmark_AMD"
|
||||
|
||||
initOpenCL_AMD()
|
||||
|
||||
language "C++"
|
||||
|
||||
kind "ConsoleApp"
|
||||
targetdir "../../../bin"
|
||||
|
||||
initOpenGL()
|
||||
initGlut()
|
||||
initGlew()
|
||||
|
||||
includedirs {
|
||||
"../../../rendering/BulletMath",
|
||||
"../../primitives",
|
||||
"../../../../../src"
|
||||
}
|
||||
|
||||
files {
|
||||
"../main.cpp",
|
||||
"../findPairsOpenCL.cpp",
|
||||
"../findPairsOpenCL.h",
|
||||
"../btGridBroadphaseCL.cpp",
|
||||
"../btGridBroadphaseCL.h",
|
||||
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.cpp",
|
||||
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.h",
|
||||
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.cpp",
|
||||
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.h",
|
||||
"../../../../../src/LinearMath/btAlignedAllocator.cpp",
|
||||
"../../../../../src/LinearMath/btQuickprof.cpp",
|
||||
"../../../../../src/LinearMath/btQuickprof.h",
|
||||
"../../../../../src/BulletCollision/BroadphaseCollision/btBroadphaseProxy.cpp",
|
||||
"../../../../../src/BulletCollision/BroadphaseCollision/btOverlappingPairCache.cpp",
|
||||
"../../../../../src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.h",
|
||||
"../../opengl_interop/btOpenCLGLInteropBuffer.cpp",
|
||||
"../../opengl_interop/btOpenCLGLInteropBuffer.h",
|
||||
"../../opengl_interop/btStopwatch.cpp",
|
||||
"../../opengl_interop/btStopwatch.h"
|
||||
}
|
||||
|
||||
end
|
||||
@@ -0,0 +1,49 @@
|
||||
|
||||
hasCL = findOpenCL_Intel()
|
||||
|
||||
if (hasCL) then
|
||||
|
||||
project "OpenCL_broadphase_benchmark_Intel"
|
||||
|
||||
initOpenCL_Intel()
|
||||
|
||||
language "C++"
|
||||
|
||||
kind "ConsoleApp"
|
||||
targetdir "../../../bin"
|
||||
|
||||
initOpenGL()
|
||||
initGlut()
|
||||
initGlew()
|
||||
|
||||
includedirs {
|
||||
"../../../rendering/BulletMath",
|
||||
"../../primitives",
|
||||
"../../../../../src"
|
||||
}
|
||||
|
||||
files {
|
||||
"../main.cpp",
|
||||
"../findPairsOpenCL.cpp",
|
||||
"../findPairsOpenCL.h",
|
||||
"../btGridBroadphaseCL.cpp",
|
||||
"../btGridBroadphaseCL.h",
|
||||
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.cpp",
|
||||
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.h",
|
||||
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.cpp",
|
||||
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.h",
|
||||
"../../../../../src/LinearMath/btAlignedAllocator.cpp",
|
||||
"../../../../../src/LinearMath/btQuickprof.cpp",
|
||||
"../../../../../src/LinearMath/btQuickprof.h",
|
||||
"../../../../../src/BulletCollision/BroadphaseCollision/btBroadphaseProxy.cpp",
|
||||
"../../../../../src/BulletCollision/BroadphaseCollision/btOverlappingPairCache.cpp",
|
||||
"../../../../../src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.h",
|
||||
"../../opengl_interop/btOpenCLGLInteropBuffer.cpp",
|
||||
"../../opengl_interop/btOpenCLGLInteropBuffer.h",
|
||||
"../../opengl_interop/btStopwatch.cpp",
|
||||
"../../opengl_interop/btStopwatch.h"
|
||||
}
|
||||
|
||||
end
|
||||
@@ -0,0 +1,49 @@
|
||||
|
||||
hasCL = findOpenCL_NVIDIA()
|
||||
|
||||
if (hasCL) then
|
||||
|
||||
project "OpenCL_broadphase_benchmark_NVIDIA"
|
||||
|
||||
initOpenCL_NVIDIA()
|
||||
|
||||
language "C++"
|
||||
|
||||
kind "ConsoleApp"
|
||||
targetdir "../../../bin"
|
||||
|
||||
initOpenGL()
|
||||
initGlut()
|
||||
initGlew()
|
||||
|
||||
includedirs {
|
||||
"../../../rendering/BulletMath",
|
||||
"../../primitives",
|
||||
"../../../../../src"
|
||||
}
|
||||
|
||||
files {
|
||||
"../main.cpp",
|
||||
"../findPairsOpenCL.cpp",
|
||||
"../findPairsOpenCL.h",
|
||||
"../btGridBroadphaseCL.cpp",
|
||||
"../btGridBroadphaseCL.h",
|
||||
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.cpp",
|
||||
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.h",
|
||||
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.cpp",
|
||||
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.h",
|
||||
"../../../../../src/LinearMath/btAlignedAllocator.cpp",
|
||||
"../../../../../src/LinearMath/btQuickprof.cpp",
|
||||
"../../../../../src/LinearMath/btQuickprof.h",
|
||||
"../../../../../src/BulletCollision/BroadphaseCollision/btBroadphaseProxy.cpp",
|
||||
"../../../../../src/BulletCollision/BroadphaseCollision/btOverlappingPairCache.cpp",
|
||||
"../../../../../src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.h",
|
||||
"../../opengl_interop/btOpenCLGLInteropBuffer.cpp",
|
||||
"../../opengl_interop/btOpenCLGLInteropBuffer.h",
|
||||
"../../opengl_interop/btStopwatch.cpp",
|
||||
"../../opengl_interop/btStopwatch.h"
|
||||
}
|
||||
|
||||
end
|
||||
@@ -0,0 +1,335 @@
|
||||
MSTRINGIFY(
|
||||
|
||||
typedef struct
|
||||
{
|
||||
float4 m_row[3];
|
||||
} Matrix3x3;
|
||||
|
||||
typedef unsigned int u32;
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
float4 m_pos;
|
||||
float4 m_quat;
|
||||
float4 m_linVel;
|
||||
float4 m_angVel;
|
||||
|
||||
u32 m_shapeIdx;
|
||||
u32 m_shapeType;
|
||||
float m_invMass;
|
||||
float m_restituitionCoeff;
|
||||
float m_frictionCoeff;
|
||||
} Body;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
Matrix3x3 m_invInertia;
|
||||
Matrix3x3 m_initInvInertia;
|
||||
} Shape;
|
||||
|
||||
|
||||
__inline
|
||||
Matrix3x3 qtGetRotationMatrix(float4 quat)
|
||||
{
|
||||
float4 quat2 = (float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);
|
||||
Matrix3x3 out;
|
||||
|
||||
out.m_row[0].x=fabs(1-2*quat2.y-2*quat2.z);
|
||||
out.m_row[0].y=fabs(2*quat.x*quat.y-2*quat.w*quat.z);
|
||||
out.m_row[0].z=fabs(2*quat.x*quat.z+2*quat.w*quat.y);
|
||||
out.m_row[0].w = 0.f;
|
||||
|
||||
out.m_row[1].x=fabs(2*quat.x*quat.y+2*quat.w*quat.z);
|
||||
out.m_row[1].y=fabs(1-2*quat2.x-2*quat2.z);
|
||||
out.m_row[1].z=fabs(2*quat.y*quat.z-2*quat.w*quat.x);
|
||||
out.m_row[1].w = 0.f;
|
||||
|
||||
out.m_row[2].x=fabs(2*quat.x*quat.z-2*quat.w*quat.y);
|
||||
out.m_row[2].y=fabs(2*quat.y*quat.z+2*quat.w*quat.x);
|
||||
out.m_row[2].z=fabs(1-2*quat2.x-2*quat2.y);
|
||||
out.m_row[2].w = 0.f;
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
float fx;
|
||||
float fy;
|
||||
float fz;
|
||||
unsigned int uw;
|
||||
} btAABBCL;
|
||||
|
||||
__inline
|
||||
Matrix3x3 mtTranspose(Matrix3x3 m)
|
||||
{
|
||||
Matrix3x3 out;
|
||||
out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);
|
||||
out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);
|
||||
out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);
|
||||
return out;
|
||||
}
|
||||
|
||||
__inline
|
||||
float dot3F4(float4 a, float4 b)
|
||||
{
|
||||
float4 a1 = (float4)(a.xyz,0.f);
|
||||
float4 b1 = (float4)(b.xyz,0.f);
|
||||
return dot(a1, b1);
|
||||
}
|
||||
|
||||
|
||||
__inline
|
||||
Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b)
|
||||
{
|
||||
Matrix3x3 transB;
|
||||
transB = mtTranspose( b );
|
||||
Matrix3x3 ans;
|
||||
// why this doesn't run when 0ing in the for{}
|
||||
a.m_row[0].w = 0.f;
|
||||
a.m_row[1].w = 0.f;
|
||||
a.m_row[2].w = 0.f;
|
||||
for(int i=0; i<3; i++)
|
||||
{
|
||||
// a.m_row[i].w = 0.f;
|
||||
ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]);
|
||||
ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]);
|
||||
ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]);
|
||||
ans.m_row[i].w = 0.f;
|
||||
}
|
||||
return ans;
|
||||
}
|
||||
|
||||
|
||||
//apply gravity
|
||||
//update world inverse inertia tensor
|
||||
//copy velocity from arrays to bodies
|
||||
//copy transforms from buffer to bodies
|
||||
|
||||
__kernel void
|
||||
setupBodiesKernel( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer,
|
||||
__global float4 *linVel,
|
||||
__global float4 *pAngVel,
|
||||
__global Body* gBodies, __global Shape* bodyInertias
|
||||
)
|
||||
{
|
||||
int nodeID = get_global_id(0);
|
||||
|
||||
float timeStep = 0.0166666f;
|
||||
float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254);
|
||||
|
||||
if( nodeID < numNodes )
|
||||
{
|
||||
float inverseMass = gBodies[nodeID].m_invMass;
|
||||
if (inverseMass != 0.f)
|
||||
{
|
||||
float4 position = g_vertexBuffer[nodeID + startOffset/4];
|
||||
float4 orientation = g_vertexBuffer[nodeID + startOffset/4+numNodes];
|
||||
|
||||
float4 gravityAcceleration = (float4)(0.f,-9.8f,0.f,0.f);
|
||||
linVel[nodeID] += gravityAcceleration * timeStep;
|
||||
|
||||
gBodies[nodeID].m_pos = position;
|
||||
gBodies[nodeID].m_quat = orientation;
|
||||
|
||||
gBodies[nodeID].m_linVel = (float4)(linVel[nodeID].xyz,0.f);
|
||||
gBodies[nodeID].m_angVel = (float4)(pAngVel[nodeID].xyz,0.f);
|
||||
|
||||
Matrix3x3 m = qtGetRotationMatrix( orientation);
|
||||
Matrix3x3 mT = mtTranspose( m );
|
||||
|
||||
Matrix3x3 tmp = mtMul(m, bodyInertias[nodeID].m_initInvInertia);
|
||||
Matrix3x3 tmp2 = mtMul(tmp, mT);
|
||||
bodyInertias[nodeID].m_invInertia = tmp2;
|
||||
|
||||
//shapeInfo.m_invInertia = mtMul( mtMul( m, shapeInfo.m_initInvInertia ), mT );
|
||||
|
||||
|
||||
} else
|
||||
{
|
||||
gBodies[nodeID].m_linVel = (float4)(0.f,0.f,0.f,0.f);
|
||||
gBodies[nodeID].m_angVel = (float4)(0.f,0.f,0.f,0.f);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void
|
||||
copyVelocitiesKernel( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer,
|
||||
__global float4 *linVel,
|
||||
__global float4 *pAngVel,
|
||||
__global Body* gBodies, __global Shape* bodyInertias
|
||||
)
|
||||
{
|
||||
int nodeID = get_global_id(0);
|
||||
if( nodeID < numNodes )
|
||||
{
|
||||
float inverseMass = gBodies[nodeID].m_invMass;
|
||||
if (inverseMass != 0.f)
|
||||
{
|
||||
linVel[nodeID] = (float4)(gBodies[nodeID].m_linVel.xyz,0.f);
|
||||
pAngVel[nodeID] = (float4)(gBodies[nodeID].m_angVel.xyz,0.f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
__kernel void
|
||||
initializeGpuAabbsSimple( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer, __global btAABBCL* pAABB)
|
||||
{
|
||||
int nodeID = get_global_id(0);
|
||||
|
||||
if( nodeID < numNodes )
|
||||
{
|
||||
float4 position = g_vertexBuffer[nodeID + startOffset/4];
|
||||
float4 orientation = g_vertexBuffer[nodeID + startOffset/4+numNodes];
|
||||
float4 color = g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes];
|
||||
|
||||
float4 green = (float4)(.4f,1.f,.4f,1.f);
|
||||
g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes] = green;
|
||||
|
||||
|
||||
float4 halfExtents = (float4)(1.01f,1.01f,1.01f,0.f);
|
||||
//float4 extent=(float4)(1.f,1.f,1.f,0.f);
|
||||
|
||||
Matrix3x3 abs_b = qtGetRotationMatrix(orientation);
|
||||
|
||||
float4 extent = (float4) (
|
||||
dot(abs_b.m_row[0],halfExtents),
|
||||
dot(abs_b.m_row[1],halfExtents),
|
||||
dot(abs_b.m_row[2],halfExtents),
|
||||
0.f);
|
||||
|
||||
|
||||
pAABB[nodeID*2].fx = position.x-extent.x;
|
||||
pAABB[nodeID*2].fy = position.y-extent.y;
|
||||
pAABB[nodeID*2].fz = position.z-extent.z;
|
||||
pAABB[nodeID*2].uw = nodeID;
|
||||
|
||||
pAABB[nodeID*2+1].fx = position.x+extent.x;
|
||||
pAABB[nodeID*2+1].fy = position.y+extent.y;
|
||||
pAABB[nodeID*2+1].fz = position.z+extent.z;
|
||||
pAABB[nodeID*2+1].uw = nodeID;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
__kernel void
|
||||
initializeGpuAabbsFull( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer, __global Body* gBodies, __global btAABBCL* plocalShapeAABB, __global btAABBCL* pAABB)
|
||||
{
|
||||
int nodeID = get_global_id(0);
|
||||
|
||||
if( nodeID < numNodes )
|
||||
{
|
||||
float4 position = g_vertexBuffer[nodeID + startOffset/4];
|
||||
float4 orientation = g_vertexBuffer[nodeID + startOffset/4+numNodes];
|
||||
float4 color = g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes];
|
||||
|
||||
float4 green = (float4)(.4f,1.f,.4f,1.f);
|
||||
g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes] = green;
|
||||
|
||||
int shapeIndex = gBodies[nodeID].m_shapeIdx;
|
||||
if (shapeIndex>=0)
|
||||
{
|
||||
btAABBCL minAabb = plocalShapeAABB[shapeIndex*2];
|
||||
btAABBCL maxAabb = plocalShapeAABB[shapeIndex*2+1];
|
||||
|
||||
float4 halfExtents = ((float4)(maxAabb.fx - minAabb.fx,maxAabb.fy - minAabb.fy,maxAabb.fz - minAabb.fz,0.f))*0.5f;
|
||||
|
||||
Matrix3x3 abs_b = qtGetRotationMatrix(orientation);
|
||||
float4 extent = (float4) ( dot(abs_b.m_row[0],halfExtents),dot(abs_b.m_row[1],halfExtents),dot(abs_b.m_row[2],halfExtents),0.f);
|
||||
|
||||
|
||||
pAABB[nodeID*2].fx = position.x-extent.x;
|
||||
pAABB[nodeID*2].fy = position.y-extent.y;
|
||||
pAABB[nodeID*2].fz = position.z-extent.z;
|
||||
pAABB[nodeID*2].uw = nodeID;
|
||||
|
||||
pAABB[nodeID*2+1].fx = position.x+extent.x;
|
||||
pAABB[nodeID*2+1].fy = position.y+extent.y;
|
||||
pAABB[nodeID*2+1].fz = position.z+extent.z;
|
||||
pAABB[nodeID*2+1].uw = nodeID;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void
|
||||
broadphaseColorKernel( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer, __global int2* pOverlappingPairs, const int numOverlap)
|
||||
{
|
||||
int nodeID = get_global_id(0);
|
||||
if( nodeID < numOverlap )
|
||||
{
|
||||
int2 pair = pOverlappingPairs[nodeID];
|
||||
float4 red = (float4)(1.f,0.4f,0.4f,1.f);
|
||||
|
||||
g_vertexBuffer[pair.x + startOffset/4+numNodes+numNodes] = red;
|
||||
g_vertexBuffer[pair.y + startOffset/4+numNodes+numNodes] = red;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
__kernel void
|
||||
broadphaseKernel( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer)
|
||||
{
|
||||
int nodeID = get_global_id(0);
|
||||
|
||||
// float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254);
|
||||
|
||||
if( nodeID < numNodes )
|
||||
{
|
||||
float4 position = g_vertexBuffer[nodeID + startOffset/4];
|
||||
//float4 orientation = g_vertexBuffer[nodeID + startOffset/4+numNodes];
|
||||
float4 color = g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes];
|
||||
|
||||
float4 red = (float4)(1.f,0.f,0.f,0.f);
|
||||
float4 green = (float4)(0.f,1.f,0.f,0.f);
|
||||
float4 blue = (float4)(0.f,0.f,1.f,0.f);
|
||||
float overlap=0;
|
||||
int equal = 0;
|
||||
|
||||
g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes] = green;
|
||||
|
||||
for (int i=0;i<numNodes;i++)
|
||||
{
|
||||
if (i!=nodeID)
|
||||
{
|
||||
float4 otherPosition = g_vertexBuffer[i + startOffset/4];
|
||||
if ((otherPosition.x == position.x)&&
|
||||
(otherPosition.y == position.y)&&
|
||||
(otherPosition.z == position.z))
|
||||
equal=1;
|
||||
|
||||
|
||||
float distsqr =
|
||||
((otherPosition.x - position.x)* (otherPosition.x - position.x))+
|
||||
((otherPosition.y - position.y)* (otherPosition.y - position.y))+
|
||||
((otherPosition.z - position.z)* (otherPosition.z - position.z));
|
||||
|
||||
if (distsqr<7.f)
|
||||
overlap+=0.25f;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (equal)
|
||||
{
|
||||
g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes]=blue;
|
||||
} else
|
||||
{
|
||||
if (overlap>0.f)
|
||||
g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes]=red*overlap;
|
||||
else
|
||||
g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes]=green;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
);
|
||||
@@ -0,0 +1,231 @@
|
||||
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Roman Ponomarev, Erwin Coumans
|
||||
|
||||
#ifdef RELEASE_ME
|
||||
#define COMPUTE_AABB_KERNEL_PATH "computeAabbKernelOCL.cl"
|
||||
#else
|
||||
#define COMPUTE_AABB_KERNEL_PATH "..\\..\\opencl\\broadphase_benchmark\\computeAabbKernelOCL"
|
||||
#endif
|
||||
|
||||
|
||||
#include "btGridBroadphaseCl.h"
|
||||
#include "LinearMath/btQuickprof.h"
|
||||
#include "Adl/Adl.h"
|
||||
#include "AdlPrimitives/Math/Math.h"
|
||||
|
||||
#include "Adl/AdlKernel.h"
|
||||
#include "../basic_initialize/btOpenCLUtils.h"
|
||||
#define MSTRINGIFY(A) #A
|
||||
static const char* spComputeAabbSource=
|
||||
#include "computeAabbKernelOCL.cl"
|
||||
|
||||
struct btTmpAabb
|
||||
{
|
||||
float minfx;
|
||||
float minfy;
|
||||
float minfz;
|
||||
unsigned int index0;
|
||||
float maxfx;
|
||||
float maxfy;
|
||||
float maxfz;
|
||||
unsigned int index1;
|
||||
} ;
|
||||
|
||||
|
||||
|
||||
|
||||
btGridBroadphaseCl::btGridBroadphaseCl( btOverlappingPairCache* overlappingPairCache,
|
||||
const btVector3& cellSize,
|
||||
int gridSizeX, int gridSizeY, int gridSizeZ,
|
||||
int maxSmallProxies, int maxLargeProxies, int maxPairsPerSmallProxy,
|
||||
btScalar maxSmallProxySize,
|
||||
int maxSmallProxiesPerCell,
|
||||
cl_context context,
|
||||
cl_device_id device,
|
||||
cl_command_queue queue,
|
||||
adl::DeviceCL* deviceCL)
|
||||
:bt3dGridBroadphaseOCL(overlappingPairCache,cellSize,
|
||||
gridSizeX, gridSizeY, gridSizeZ,
|
||||
maxSmallProxies, maxLargeProxies, maxPairsPerSmallProxy,
|
||||
maxSmallProxySize,maxSmallProxiesPerCell,
|
||||
context,device,queue,deviceCL)
|
||||
{
|
||||
m_computeAabbKernel = m_deviceCL->getKernel(COMPUTE_AABB_KERNEL_PATH,"computeAabb","",spComputeAabbSource);
|
||||
|
||||
m_countOverlappingPairs = m_deviceCL->getKernel(COMPUTE_AABB_KERNEL_PATH,"countOverlappingpairs","",spComputeAabbSource);
|
||||
|
||||
m_squeezePairCaches = m_deviceCL->getKernel(COMPUTE_AABB_KERNEL_PATH,"squeezePairCaches","",spComputeAabbSource);
|
||||
|
||||
m_aabbConstBuffer = new adl::Buffer<MyAabbConstData >(m_deviceCL,1,adl::BufferBase::BUFFER_CONST);
|
||||
|
||||
size_t memSize = m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int)*2;
|
||||
cl_int ciErrNum=0;
|
||||
m_dAllOverlappingPairs = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
|
||||
|
||||
memset(m_hAllOverlappingPairs, 0x00, sizeof(MyUint2)*m_maxHandles * m_maxPairsPerBody);
|
||||
copyArrayToDevice(m_dAllOverlappingPairs, m_hAllOverlappingPairs, m_maxHandles * m_maxPairsPerBody * sizeof(MyUint2));
|
||||
|
||||
|
||||
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
btGridBroadphaseCl::~btGridBroadphaseCl()
|
||||
{
|
||||
clReleaseMemObject(m_dAllOverlappingPairs);
|
||||
|
||||
delete m_aabbConstBuffer;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btGridBroadphaseCl::prepareAABB(float* positions, int numObjects)
|
||||
{
|
||||
return;
|
||||
#if 0
|
||||
bt3dGridBroadphaseOCL::prepareAABB();
|
||||
#else
|
||||
BT_PROFILE("prepareAABB");
|
||||
bt3DGrid3F1U* pBB = m_hAABB;
|
||||
|
||||
int new_largest_index = numObjects;
|
||||
unsigned int num_small = numObjects;
|
||||
m_LastHandleIndex = new_largest_index;
|
||||
new_largest_index = -1;
|
||||
unsigned int num_large = 0;
|
||||
m_LastLargeHandleIndex = new_largest_index;
|
||||
// paranoid checks
|
||||
//btAssert(num_small == m_numHandles);
|
||||
//btAssert(num_large == m_numLargeHandles);
|
||||
|
||||
//copyArrayFromDevice( m_hAABB, m_dAABB, sizeof(bt3DGrid3F1U) * 2 * (m_numHandles + m_numLargeHandles));
|
||||
//clFinish(m_cqCommandQue);
|
||||
#endif
|
||||
|
||||
}
|
||||
void btGridBroadphaseCl::calcHashAABB()
|
||||
{
|
||||
bt3dGridBroadphaseOCL::calcHashAABB();
|
||||
}
|
||||
|
||||
|
||||
void btGridBroadphaseCl::calculateOverlappingPairs(float* positions, int numObjects)
|
||||
{
|
||||
btDispatcher* dispatcher=0;
|
||||
|
||||
// update constants
|
||||
{
|
||||
BT_PROFILE("setParameters");
|
||||
setParameters(&m_params);
|
||||
}
|
||||
|
||||
// prepare AABB array
|
||||
{
|
||||
BT_PROFILE("prepareAABB");
|
||||
prepareAABB(positions, numObjects);
|
||||
}
|
||||
// calculate hash
|
||||
{
|
||||
BT_PROFILE("calcHashAABB");
|
||||
calcHashAABB();
|
||||
}
|
||||
|
||||
{
|
||||
BT_PROFILE("sortHash");
|
||||
// sort bodies based on hash
|
||||
sortHash();
|
||||
}
|
||||
|
||||
// find start of each cell
|
||||
{
|
||||
BT_PROFILE("findCellStart");
|
||||
findCellStart();
|
||||
}
|
||||
|
||||
{
|
||||
BT_PROFILE("findOverlappingPairs");
|
||||
// findOverlappingPairs (small/small)
|
||||
findOverlappingPairs();
|
||||
}
|
||||
|
||||
// add pairs to CPU cache
|
||||
{
|
||||
BT_PROFILE("computePairCacheChanges");
|
||||
#if 0
|
||||
computePairCacheChanges();
|
||||
#else
|
||||
int ciErrNum=0;
|
||||
|
||||
ciErrNum=clSetKernelArg((cl_kernel)m_countOverlappingPairs->m_kernel, 0, sizeof(int), (void*)&numObjects);
|
||||
ciErrNum=clSetKernelArg((cl_kernel)m_countOverlappingPairs->m_kernel, 1, sizeof(cl_mem),(void*)&m_dPairBuff);
|
||||
ciErrNum=clSetKernelArg((cl_kernel)m_countOverlappingPairs->m_kernel, 2, sizeof(cl_mem),(void*)&m_dPairBuffStartCurr);
|
||||
ciErrNum=clSetKernelArg((cl_kernel)m_countOverlappingPairs->m_kernel, 3, sizeof(cl_mem),(void*)&m_dPairScanChanged);
|
||||
ciErrNum=clSetKernelArg((cl_kernel)m_countOverlappingPairs->m_kernel, 4, sizeof(cl_mem),(void*)&m_dAABB);
|
||||
|
||||
|
||||
size_t localWorkSize=64;
|
||||
size_t numWorkItems = localWorkSize*((numObjects+ (localWorkSize)) / localWorkSize);
|
||||
|
||||
|
||||
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, (cl_kernel)m_countOverlappingPairs->m_kernel, 1, NULL, &numWorkItems, &localWorkSize, 0,0,0 );
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
ciErrNum = clFlush(m_cqCommandQue);
|
||||
#endif
|
||||
|
||||
|
||||
}
|
||||
{
|
||||
BT_PROFILE("scanOverlappingPairBuff");
|
||||
scanOverlappingPairBuff(false);
|
||||
}
|
||||
{
|
||||
BT_PROFILE("squeezeOverlappingPairBuff");
|
||||
//#define FORCE_CPU
|
||||
#ifdef FORCE_CPU
|
||||
bt3dGridBroadphaseOCL::squeezeOverlappingPairBuff();
|
||||
copyArrayToDevice(m_dPairsChangedXY, m_hPairsChangedXY, sizeof( MyUint2) * m_numPrefixSum); //gSum
|
||||
#else
|
||||
//squeezeOverlappingPairBuff();
|
||||
int ciErrNum = 0;
|
||||
ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 0, sizeof(int), (void*)&numObjects);
|
||||
ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 1, sizeof(cl_mem),(void*)&m_dPairBuff);
|
||||
ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 2, sizeof(cl_mem),(void*)&m_dPairBuffStartCurr);
|
||||
ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 3, sizeof(cl_mem),(void*)&m_dPairScanChanged);
|
||||
ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 4, sizeof(cl_mem),(void*)&m_dAllOverlappingPairs);
|
||||
ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 5, sizeof(cl_mem),(void*)&m_dAABB);
|
||||
|
||||
size_t workGroupSize = 64;
|
||||
size_t numWorkItems = workGroupSize*((numObjects+ (workGroupSize)) / workGroupSize);
|
||||
|
||||
|
||||
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, (cl_kernel)m_squeezePairCaches->m_kernel, 1, NULL, &numWorkItems, &workGroupSize, 0,0,0 );
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
|
||||
// copyArrayFromDevice(m_hAllOverlappingPairs, m_dAllOverlappingPairs, sizeof(unsigned int) * m_numPrefixSum*2); //gSum
|
||||
// clFinish(m_cqCommandQue);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,73 @@
|
||||
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Roman Ponomarev, Erwin Coumans
|
||||
|
||||
#ifndef GRID_BROADPHASE_CL_H
|
||||
#define GRID_BROADPHASE_CL_H
|
||||
|
||||
#include "../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.h"
|
||||
|
||||
#include "Adl/Adl.h"
|
||||
#include "Adl/AdlKernel.h"
|
||||
|
||||
|
||||
struct MyAabbConstData
|
||||
{
|
||||
int bla;
|
||||
int numElem;
|
||||
};
|
||||
|
||||
|
||||
|
||||
class btGridBroadphaseCl : public bt3dGridBroadphaseOCL
|
||||
{
|
||||
protected:
|
||||
|
||||
adl::Kernel* m_computeAabbKernel;
|
||||
adl::Kernel* m_countOverlappingPairs;
|
||||
adl::Kernel* m_squeezePairCaches;
|
||||
|
||||
|
||||
adl::Buffer<MyAabbConstData>* m_aabbConstBuffer;
|
||||
|
||||
|
||||
public:
|
||||
|
||||
cl_mem m_dAllOverlappingPairs;
|
||||
|
||||
|
||||
btGridBroadphaseCl( btOverlappingPairCache* overlappingPairCache,
|
||||
const btVector3& cellSize,
|
||||
int gridSizeX, int gridSizeY, int gridSizeZ,
|
||||
int maxSmallProxies, int maxLargeProxies, int maxPairsPerSmallProxy,
|
||||
btScalar maxSmallProxySize,
|
||||
int maxSmallProxiesPerCell = 4,
|
||||
cl_context context = NULL,
|
||||
cl_device_id device = NULL,
|
||||
cl_command_queue queue = NULL,
|
||||
adl::DeviceCL* deviceCL=0
|
||||
);
|
||||
|
||||
virtual void prepareAABB(float* positions, int numObjects);
|
||||
virtual void calcHashAABB();
|
||||
|
||||
void calculateOverlappingPairs(float* positions, int numObjects);
|
||||
|
||||
virtual ~btGridBroadphaseCl();
|
||||
|
||||
};
|
||||
|
||||
#endif //GRID_BROADPHASE_CL_H
|
||||
|
||||
@@ -0,0 +1,112 @@
|
||||
MSTRINGIFY(
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int bla;
|
||||
int numElem;
|
||||
} MyAabbConstDataCL ;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
float minfx;
|
||||
float minfy;
|
||||
float minfz;
|
||||
unsigned int index0;
|
||||
float maxfx;
|
||||
float maxfy;
|
||||
float maxfz;
|
||||
unsigned int index1;
|
||||
} btAabbCL;
|
||||
|
||||
|
||||
__kernel void computeAabb( __global btAabbCL* aabbs,__global float4* positions, MyAabbConstDataCL cb)
|
||||
{
|
||||
int nodeID = get_global_id(0);
|
||||
|
||||
if( nodeID < cb.numElem )
|
||||
{
|
||||
aabbs[nodeID].minfx = positions[nodeID].x -1.f;
|
||||
aabbs[nodeID].minfy = positions[nodeID].y -1.f;
|
||||
aabbs[nodeID].minfz = positions[nodeID].z -1.f;
|
||||
aabbs[nodeID].index0 = nodeID;
|
||||
aabbs[nodeID].maxfx = positions[nodeID].x +1.f;
|
||||
aabbs[nodeID].maxfy = positions[nodeID].y +1.f;
|
||||
aabbs[nodeID].maxfz = positions[nodeID].z +1.f;
|
||||
aabbs[nodeID].index1 = nodeID;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void countOverlappingpairs( int numObjects,
|
||||
__global int* pPairBuff,
|
||||
__global int2* pPairBuffStartCurr,
|
||||
__global int* pPairScan,
|
||||
__global float4* pAABB )
|
||||
{
|
||||
int index = get_global_id(0);
|
||||
if(index >= numObjects)
|
||||
{
|
||||
return;
|
||||
}
|
||||
float4 bbMin = pAABB[index * 2];
|
||||
int handleIndex = as_int(bbMin.w);
|
||||
int2 start_curr = pPairBuffStartCurr[handleIndex];
|
||||
int start = start_curr.x;
|
||||
int curr = start_curr.y;
|
||||
__global int *pInp = pPairBuff + start;
|
||||
int num_changes = 0;
|
||||
for(int k = 0; k < curr; k++, pInp++)
|
||||
{
|
||||
if(((*pInp) & 0x60000000))//either new or existing pairs (ignore old non-overlapping pairs)
|
||||
{
|
||||
num_changes++;
|
||||
}
|
||||
}
|
||||
pPairScan[index+1] = num_changes;
|
||||
}
|
||||
|
||||
|
||||
__kernel void squeezePairCaches( int numObjects,
|
||||
__global int* pPairBuff,
|
||||
__global int2* pPairBuffStartCurr,
|
||||
__global int* pPairScan,
|
||||
__global int2* pPairOut,
|
||||
__global float4* pAABB )
|
||||
{
|
||||
int index = get_global_id(0);
|
||||
if(index >= numObjects)
|
||||
{
|
||||
return;
|
||||
}
|
||||
float4 bbMin = pAABB[index * 2];
|
||||
int handleIndex = as_int(bbMin.w);
|
||||
int2 start_curr = pPairBuffStartCurr[handleIndex];
|
||||
int start = start_curr.x;
|
||||
int curr = start_curr.y;
|
||||
__global int* pInp = pPairBuff + start;
|
||||
__global int2* pOut = pPairOut + pPairScan[index+1];
|
||||
__global int* pOut2 = pInp;
|
||||
int num = 0;
|
||||
for(int k = 0; k < curr; k++, pInp++)
|
||||
{
|
||||
if(((*pInp) & 0x60000000))
|
||||
{
|
||||
int2 newpair;
|
||||
newpair.x = handleIndex;
|
||||
newpair.y = (*pInp) & (~0x60000000);
|
||||
*pOut = newpair;
|
||||
pOut++;
|
||||
}
|
||||
if((*pInp) & 0x60000000)
|
||||
{
|
||||
*pOut2 = (*pInp) & (~0x60000000);
|
||||
pOut2++;
|
||||
num++;
|
||||
}
|
||||
}
|
||||
int2 newStartCurr;
|
||||
newStartCurr.x = start;
|
||||
newStartCurr.y = num;
|
||||
pPairBuffStartCurr[handleIndex] = newStartCurr;
|
||||
}
|
||||
);
|
||||
@@ -0,0 +1,204 @@
|
||||
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Roman Ponomarev, Erwin Coumans
|
||||
|
||||
#include "findPairsOpenCL.h"
|
||||
#include "../basic_initialize/btOpenCLUtils.h"
|
||||
|
||||
#define MSTRINGIFY(A) #A
|
||||
static char* broadphaseKernelString =
|
||||
#include "broadphaseKernel.cl"
|
||||
|
||||
#define GRID_BROADPHASE_PATH "..\\..\\opencl\\broadphase_benchmark\\broadphaseKernel.cl"
|
||||
|
||||
|
||||
|
||||
|
||||
void initFindPairs(btFindPairsIO& fpio,cl_context cxMainContext, cl_device_id device, cl_command_queue commandQueue, int maxHandles, int maxPairsPerBody)
|
||||
{
|
||||
|
||||
//m_proxies.push_back( proxy );
|
||||
|
||||
fpio.m_mainContext = cxMainContext;
|
||||
fpio.m_cqCommandQue = commandQueue;
|
||||
fpio.m_device = device;
|
||||
cl_int pErrNum;
|
||||
cl_program prog = btOpenCLUtils::compileCLProgramFromString(cxMainContext, device, broadphaseKernelString, &pErrNum ,"",GRID_BROADPHASE_PATH);
|
||||
|
||||
fpio.m_broadphaseBruteForceKernel = btOpenCLUtils::compileCLKernelFromString(cxMainContext,device, broadphaseKernelString, "broadphaseKernel" ,&pErrNum,prog);
|
||||
fpio.m_initializeGpuAabbsKernelSimple = btOpenCLUtils::compileCLKernelFromString(cxMainContext,device, broadphaseKernelString, "initializeGpuAabbsSimple" ,&pErrNum,prog);
|
||||
fpio.m_initializeGpuAabbsKernelFull = btOpenCLUtils::compileCLKernelFromString(cxMainContext,device, broadphaseKernelString, "initializeGpuAabbsFull" ,&pErrNum,prog);
|
||||
|
||||
fpio.m_broadphaseColorKernel = btOpenCLUtils::compileCLKernelFromString(cxMainContext,device, broadphaseKernelString, "broadphaseColorKernel" ,&pErrNum,prog);
|
||||
|
||||
fpio.m_setupBodiesKernel = btOpenCLUtils::compileCLKernelFromString(cxMainContext,device, broadphaseKernelString, "setupBodiesKernel" ,&pErrNum,prog);
|
||||
fpio.m_copyVelocitiesKernel = btOpenCLUtils::compileCLKernelFromString(cxMainContext,device, broadphaseKernelString, "copyVelocitiesKernel" ,&pErrNum,prog);
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
void findPairsOpenCLBruteForce(btFindPairsIO& fpio)
|
||||
{
|
||||
|
||||
int ciErrNum = 0;
|
||||
|
||||
int numObjects = fpio.m_numObjects;
|
||||
int offset = fpio.m_positionOffset;
|
||||
|
||||
ciErrNum = clSetKernelArg(fpio.m_broadphaseBruteForceKernel, 0, sizeof(int), &offset);
|
||||
ciErrNum = clSetKernelArg(fpio.m_broadphaseBruteForceKernel, 1, sizeof(int), &numObjects);
|
||||
ciErrNum = clSetKernelArg(fpio.m_broadphaseBruteForceKernel, 2, sizeof(cl_mem), (void*)&fpio.m_clObjectsBuffer);
|
||||
|
||||
size_t numWorkItems = numObjects;///workGroupSize*((NUM_OBJECTS + (workGroupSize)) / workGroupSize);
|
||||
size_t workGroupSize = 64;
|
||||
ciErrNum = clEnqueueNDRangeKernel(fpio.m_cqCommandQue, fpio.m_broadphaseBruteForceKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
}
|
||||
|
||||
void setupGpuAabbsFull(btFindPairsIO& fpio, cl_mem bodies)
|
||||
{
|
||||
|
||||
int ciErrNum = 0;
|
||||
|
||||
int numObjects = fpio.m_numObjects;
|
||||
int offset = fpio.m_positionOffset;
|
||||
|
||||
ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelFull, 0, sizeof(int), &offset);
|
||||
ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelFull, 1, sizeof(int), &numObjects);
|
||||
ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelFull, 2, sizeof(cl_mem), (void*)&fpio.m_clObjectsBuffer);
|
||||
ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelFull, 3, sizeof(cl_mem), (void*)&bodies);
|
||||
ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelFull, 4, sizeof(cl_mem), (void*)&fpio.m_dlocalShapeAABB);
|
||||
ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelFull, 5, sizeof(cl_mem), (void*)&fpio.m_dAABB);
|
||||
size_t workGroupSize = 64;
|
||||
size_t numWorkItems = workGroupSize*((numObjects+ (workGroupSize)) / workGroupSize);
|
||||
|
||||
ciErrNum = clEnqueueNDRangeKernel(fpio.m_cqCommandQue, fpio.m_initializeGpuAabbsKernelFull, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
}
|
||||
|
||||
void setupGpuAabbsSimple(btFindPairsIO& fpio)
|
||||
{
|
||||
|
||||
int ciErrNum = 0;
|
||||
|
||||
int numObjects = fpio.m_numObjects;
|
||||
int offset = fpio.m_positionOffset;
|
||||
|
||||
ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelSimple, 0, sizeof(int), &offset);
|
||||
ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelSimple, 1, sizeof(int), &numObjects);
|
||||
ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelSimple, 2, sizeof(cl_mem), (void*)&fpio.m_clObjectsBuffer);
|
||||
ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelSimple, 3, sizeof(cl_mem), (void*)&fpio.m_dAABB);
|
||||
size_t workGroupSize = 64;
|
||||
size_t numWorkItems = workGroupSize*((numObjects+ (workGroupSize)) / workGroupSize);
|
||||
|
||||
ciErrNum = clEnqueueNDRangeKernel(fpio.m_cqCommandQue, fpio.m_initializeGpuAabbsKernelSimple, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
void setupBodies(btFindPairsIO& fpio, cl_mem linVelMem, cl_mem angVelMem, cl_mem bodies, cl_mem bodyInertias)
|
||||
{
|
||||
int ciErrNum = 0;
|
||||
|
||||
int numObjects = fpio.m_numObjects;
|
||||
int offset = fpio.m_positionOffset;
|
||||
|
||||
ciErrNum = clSetKernelArg(fpio.m_setupBodiesKernel, 0, sizeof(int), &offset);
|
||||
ciErrNum = clSetKernelArg(fpio.m_setupBodiesKernel, 1, sizeof(int), &fpio.m_numObjects);
|
||||
ciErrNum = clSetKernelArg(fpio.m_setupBodiesKernel, 2, sizeof(cl_mem), (void*)&fpio.m_clObjectsBuffer);
|
||||
|
||||
ciErrNum = clSetKernelArg(fpio.m_setupBodiesKernel, 3, sizeof(cl_mem), (void*)&linVelMem);
|
||||
ciErrNum = clSetKernelArg(fpio.m_setupBodiesKernel, 4, sizeof(cl_mem), (void*)&angVelMem);
|
||||
ciErrNum = clSetKernelArg(fpio.m_setupBodiesKernel, 5, sizeof(cl_mem), (void*)&bodies);
|
||||
ciErrNum = clSetKernelArg(fpio.m_setupBodiesKernel, 6, sizeof(cl_mem), (void*)&bodyInertias);
|
||||
|
||||
if (numObjects)
|
||||
{
|
||||
size_t workGroupSize = 64;
|
||||
size_t numWorkItems = workGroupSize*((numObjects+ (workGroupSize)) / workGroupSize);
|
||||
|
||||
ciErrNum = clEnqueueNDRangeKernel(fpio.m_cqCommandQue, fpio.m_setupBodiesKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
void copyBodyVelocities(btFindPairsIO& fpio, cl_mem linVelMem, cl_mem angVelMem, cl_mem bodies, cl_mem bodyInertias)
|
||||
{
|
||||
int ciErrNum = 0;
|
||||
|
||||
int numObjects = fpio.m_numObjects;
|
||||
int offset = fpio.m_positionOffset;
|
||||
|
||||
ciErrNum = clSetKernelArg(fpio.m_copyVelocitiesKernel, 0, sizeof(int), &offset);
|
||||
ciErrNum = clSetKernelArg(fpio.m_copyVelocitiesKernel, 1, sizeof(int), &fpio.m_numObjects);
|
||||
ciErrNum = clSetKernelArg(fpio.m_copyVelocitiesKernel, 2, sizeof(cl_mem), (void*)&fpio.m_clObjectsBuffer);
|
||||
|
||||
ciErrNum = clSetKernelArg(fpio.m_copyVelocitiesKernel, 3, sizeof(cl_mem), (void*)&linVelMem);
|
||||
ciErrNum = clSetKernelArg(fpio.m_copyVelocitiesKernel, 4, sizeof(cl_mem), (void*)&angVelMem);
|
||||
ciErrNum = clSetKernelArg(fpio.m_copyVelocitiesKernel, 5, sizeof(cl_mem), (void*)&bodies);
|
||||
ciErrNum = clSetKernelArg(fpio.m_copyVelocitiesKernel, 6, sizeof(cl_mem), (void*)&bodyInertias);
|
||||
|
||||
if (numObjects)
|
||||
{
|
||||
size_t workGroupSize = 64;
|
||||
size_t numWorkItems = workGroupSize*((numObjects+ (workGroupSize)) / workGroupSize);
|
||||
|
||||
ciErrNum = clEnqueueNDRangeKernel(fpio.m_cqCommandQue, fpio.m_copyVelocitiesKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void colorPairsOpenCL(btFindPairsIO& fpio)
|
||||
{
|
||||
int ciErrNum = 0;
|
||||
|
||||
int numObjects = fpio.m_numObjects;
|
||||
int offset = fpio.m_positionOffset;
|
||||
|
||||
ciErrNum = clSetKernelArg(fpio.m_broadphaseColorKernel, 0, sizeof(int), &offset);
|
||||
ciErrNum = clSetKernelArg(fpio.m_broadphaseColorKernel, 1, sizeof(int), &fpio.m_numObjects);
|
||||
ciErrNum = clSetKernelArg(fpio.m_broadphaseColorKernel, 2, sizeof(cl_mem), (void*)&fpio.m_clObjectsBuffer);
|
||||
ciErrNum = clSetKernelArg(fpio.m_broadphaseColorKernel, 3, sizeof(cl_mem), (void*)&fpio.m_dAllOverlappingPairs);
|
||||
ciErrNum = clSetKernelArg(fpio.m_broadphaseColorKernel, 4, sizeof(int), &fpio.m_numOverlap);
|
||||
|
||||
|
||||
if (fpio.m_numOverlap)
|
||||
{
|
||||
size_t workGroupSize = 64;
|
||||
size_t numWorkItems = workGroupSize*((fpio.m_numOverlap+ (workGroupSize)) / workGroupSize);
|
||||
|
||||
ciErrNum = clEnqueueNDRangeKernel(fpio.m_cqCommandQue, fpio.m_broadphaseColorKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void releaseFindPairs(btFindPairsIO& fpio)
|
||||
{
|
||||
clReleaseKernel(fpio.m_initializeGpuAabbsKernelSimple);
|
||||
clReleaseKernel(fpio.m_initializeGpuAabbsKernelFull);
|
||||
clReleaseKernel(fpio.m_broadphaseColorKernel);
|
||||
clReleaseKernel(fpio.m_broadphaseBruteForceKernel);
|
||||
clReleaseKernel(fpio.m_setupBodiesKernel);
|
||||
clReleaseKernel(fpio.m_copyVelocitiesKernel);
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,90 @@
|
||||
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Roman Ponomarev, Erwin Coumans
|
||||
|
||||
#ifndef FIND_PAIRS_H
|
||||
#define FIND_PAIRS_H
|
||||
|
||||
#include "../basic_initialize/btOpenCLInclude.h"
|
||||
|
||||
struct btKernelInfo
|
||||
{
|
||||
int m_Id;
|
||||
cl_kernel m_kernel;
|
||||
char* m_name;
|
||||
int m_workgroupSize;
|
||||
};
|
||||
|
||||
|
||||
|
||||
struct btFindPairsIO
|
||||
{
|
||||
int m_numObjects;
|
||||
|
||||
cl_mem m_clObjectsBuffer; //for memory layout details see main.cpp (todo, make it flexible)
|
||||
int m_positionOffset;//offset in m_clObjectsBuffer where position array starts
|
||||
|
||||
cl_command_queue m_cqCommandQue;
|
||||
cl_kernel m_initializeGpuAabbsKernelSimple;
|
||||
cl_kernel m_initializeGpuAabbsKernelFull;
|
||||
cl_kernel m_broadphaseColorKernel;
|
||||
cl_kernel m_broadphaseBruteForceKernel;
|
||||
|
||||
cl_kernel m_setupBodiesKernel;
|
||||
cl_kernel m_copyVelocitiesKernel;
|
||||
|
||||
cl_context m_mainContext;
|
||||
cl_device_id m_device;
|
||||
|
||||
cl_kernel m_calcHashAabbKernel;
|
||||
cl_kernel m_clearCellStartKernel;
|
||||
cl_kernel m_findCellStartKernel;
|
||||
cl_kernel m_findOverlappingPairsKernel;
|
||||
cl_kernel m_computePairChangeKernel;
|
||||
cl_kernel m_squeezePairBuffKernel;
|
||||
|
||||
|
||||
cl_mem m_dAllOverlappingPairs;
|
||||
int m_numOverlap;
|
||||
|
||||
cl_mem m_dBpParams;
|
||||
cl_mem m_dBodiesHash;
|
||||
cl_mem m_dCellStart;
|
||||
cl_mem m_dPairBuff;
|
||||
cl_mem m_dPairBuffStartCurr;
|
||||
cl_mem m_dlocalShapeAABB;
|
||||
cl_mem m_dAABB;
|
||||
cl_mem m_dPairScan;
|
||||
cl_mem m_dPairOut;
|
||||
};
|
||||
|
||||
|
||||
void initFindPairs(btFindPairsIO& fpio,cl_context cxMainContext, cl_device_id device, cl_command_queue commandQueue, int maxHandles,int maxPairsPerBody = 16);
|
||||
|
||||
void findPairsOpenCLBruteForce(btFindPairsIO& fpio);
|
||||
|
||||
void setupGpuAabbsSimple(btFindPairsIO& fpio);
|
||||
|
||||
void setupGpuAabbsFull(btFindPairsIO& fpio, cl_mem bodies);
|
||||
|
||||
|
||||
void colorPairsOpenCL(btFindPairsIO& fpio);
|
||||
|
||||
void setupBodies(btFindPairsIO& fpio, cl_mem linVelMem, cl_mem angVelMem, cl_mem bodies, cl_mem bodyInertias);
|
||||
void copyBodyVelocities(btFindPairsIO& fpio, cl_mem linVelMem, cl_mem angVelMem, cl_mem bodies, cl_mem bodyInertias);
|
||||
|
||||
void releaseFindPairs(btFindPairsIO& fpio);
|
||||
|
||||
#endif //FIND_PAIRS_H
|
||||
@@ -0,0 +1,116 @@
|
||||
MSTRINGIFY(
|
||||
|
||||
float4 quatMult(float4 q1, float4 q2)
|
||||
{
|
||||
float4 q;
|
||||
q.x = q1.w * q2.x + q1.x * q2.w + q1.y * q2.z - q1.z * q2.y;
|
||||
q.y = q1.w * q2.y + q1.y * q2.w + q1.z * q2.x - q1.x * q2.z;
|
||||
q.z = q1.w * q2.z + q1.z * q2.w + q1.x * q2.y - q1.y * q2.x;
|
||||
q.w = q1.w * q2.w - q1.x * q2.x - q1.y * q2.y - q1.z * q2.z;
|
||||
return q;
|
||||
}
|
||||
|
||||
float4 quatNorm(float4 q)
|
||||
{
|
||||
float len = native_sqrt(dot(q, q));
|
||||
if(len > 0.f)
|
||||
{
|
||||
q *= 1.f / len;
|
||||
}
|
||||
else
|
||||
{
|
||||
q.x = q.y = q.z = 0.f;
|
||||
q.w = 1.f;
|
||||
}
|
||||
return q;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
__kernel void
|
||||
integrateTransformsKernel( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer,
|
||||
__global float4 *linVel,
|
||||
__global float4 *pAngVel,
|
||||
__global float* pBodyTimes)
|
||||
{
|
||||
int nodeID = get_global_id(0);
|
||||
|
||||
|
||||
|
||||
float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);
|
||||
float mAmplitude = 66.f;
|
||||
float timeStep = 0.0166666f;
|
||||
|
||||
if( nodeID < numNodes )
|
||||
{
|
||||
|
||||
//g_vertexBuffer[nodeID + startOffset/4+numNodes] += pAngVel[nodeID];
|
||||
if (1)
|
||||
{
|
||||
float4 axis;
|
||||
//add some hardcoded angular damping
|
||||
pAngVel[nodeID].x *= 0.99f;
|
||||
pAngVel[nodeID].y *= 0.99f;
|
||||
pAngVel[nodeID].z *= 0.99f;
|
||||
|
||||
float4 angvel = pAngVel[nodeID];
|
||||
float fAngle = native_sqrt(dot(angvel, angvel));
|
||||
//limit the angular motion
|
||||
if(fAngle*timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD)
|
||||
{
|
||||
fAngle = BT_GPU_ANGULAR_MOTION_THRESHOLD / timeStep;
|
||||
}
|
||||
if(fAngle < 0.001f)
|
||||
{
|
||||
// use Taylor's expansions of sync function
|
||||
axis = angvel * (0.5f*timeStep-(timeStep*timeStep*timeStep)*0.020833333333f * fAngle * fAngle);
|
||||
}
|
||||
else
|
||||
{
|
||||
// sync(fAngle) = sin(c*fAngle)/t
|
||||
axis = angvel * ( native_sin(0.5f * fAngle * timeStep) / fAngle);
|
||||
}
|
||||
float4 dorn = axis;
|
||||
dorn.w = native_cos(fAngle * timeStep * 0.5f);
|
||||
float4 orn0 = g_vertexBuffer[nodeID + startOffset/4+numNodes];
|
||||
float4 predictedOrn = quatMult(dorn, orn0);
|
||||
predictedOrn = quatNorm(predictedOrn);
|
||||
g_vertexBuffer[nodeID + startOffset/4+numNodes]=predictedOrn;
|
||||
}
|
||||
|
||||
//linear velocity
|
||||
g_vertexBuffer[nodeID + startOffset/4] += linVel[nodeID] * timeStep;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void
|
||||
sineWaveKernel( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer,
|
||||
__global float4 *linVel,
|
||||
__global float4 *pAngVel,
|
||||
__global float* pBodyTimes)
|
||||
{
|
||||
int nodeID = get_global_id(0);
|
||||
float timeStepPos = 0.000166666;
|
||||
|
||||
float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);
|
||||
float mAmplitude = 166.f;
|
||||
|
||||
|
||||
if( nodeID < numNodes )
|
||||
{
|
||||
pBodyTimes[nodeID] += timeStepPos;
|
||||
float4 position = g_vertexBuffer[nodeID + startOffset/4];
|
||||
position.x = native_cos(pBodyTimes[nodeID]*2.17f)*mAmplitude + native_sin(pBodyTimes[nodeID])*mAmplitude*0.5f;
|
||||
position.y = native_cos(pBodyTimes[nodeID]*1.38f)*mAmplitude + native_sin(pBodyTimes[nodeID]*mAmplitude);
|
||||
position.z = native_cos(pBodyTimes[nodeID]*2.17f)*mAmplitude + native_sin(pBodyTimes[nodeID]*0.777f)*mAmplitude;
|
||||
g_vertexBuffer[nodeID + startOffset/4] = position;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
);
|
||||
1565
Extras/RigidBodyGpuPipeline/opencl/broadphase_benchmark/main.cpp
Normal file
1565
Extras/RigidBodyGpuPipeline/opencl/broadphase_benchmark/main.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,5 @@
|
||||
|
||||
include "AMD"
|
||||
include "Intel"
|
||||
include "NVIDIA"
|
||||
|
||||
@@ -0,0 +1,23 @@
|
||||
|
||||
hasCL = findOpenCL_AMD()
|
||||
|
||||
if (hasCL) then
|
||||
|
||||
project "OpenCL_global_atomics_AMD"
|
||||
|
||||
initOpenCL_AMD()
|
||||
|
||||
language "C++"
|
||||
|
||||
kind "ConsoleApp"
|
||||
targetdir "../../../bin"
|
||||
|
||||
-- includedirs {"..","../../../../include/gpu_research"}
|
||||
|
||||
files {
|
||||
"../main.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.h"
|
||||
}
|
||||
|
||||
end
|
||||
@@ -0,0 +1,36 @@
|
||||
static const char* globalAtomicsKernelString= \
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"//OpenCL 1.1 has atomic_inc build-in (no extension needed)\n"
|
||||
"//see http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/atomic_inc.html\n"
|
||||
"__kernel void globalAtomicKernelOpenCL1_1( volatile __global int* counter)\n"
|
||||
"{\n"
|
||||
" atomic_inc(counter);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"//OpenCL 1.1 atomic device counters extension, usually faster on current AMD hardware\n"
|
||||
"//http://www.khronos.org/registry/cl/extensions/ext/cl_ext_atomic_counters_32.txt\n"
|
||||
"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n"
|
||||
"__kernel void counterAtomicKernelExt( counter32_t counter)\n"
|
||||
"{\n"
|
||||
" atomic_inc(counter);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"//OpenCL 1.0 optional extension, using atom_inc\n"
|
||||
"//see http://www.khronos.org/registry/cl/sdk/1.0/docs/man/xhtml/cl_khr_global_int32_base_atomics.html\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable //atomic_inc\n"
|
||||
"__kernel void globalAtomicKernelExt( __global int* counter)\n"
|
||||
"{\n"
|
||||
" atom_inc(counter);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void globalAtomicKernelCounters32Broken( __global int* counter)\n"
|
||||
"{\n"
|
||||
" (*counter)++;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
;
|
||||
@@ -0,0 +1,34 @@
|
||||
|
||||
|
||||
|
||||
|
||||
//OpenCL 1.1 has atomic_inc build-in (no extension needed)
|
||||
//see http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/atomic_inc.html
|
||||
__kernel void globalAtomicKernelOpenCL1_1( volatile __global int* counter)
|
||||
{
|
||||
atomic_inc(counter);
|
||||
}
|
||||
|
||||
//OpenCL 1.1 atomic device counters extension, usually faster on current AMD hardware
|
||||
//http://www.khronos.org/registry/cl/extensions/ext/cl_ext_atomic_counters_32.txt
|
||||
#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable
|
||||
__kernel void counterAtomicKernelExt( counter32_t counter)
|
||||
{
|
||||
atomic_inc(counter);
|
||||
}
|
||||
|
||||
|
||||
//OpenCL 1.0 optional extension, using atom_inc
|
||||
//see http://www.khronos.org/registry/cl/sdk/1.0/docs/man/xhtml/cl_khr_global_int32_base_atomics.html
|
||||
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable //atomic_inc
|
||||
__kernel void globalAtomicKernelExt( __global int* counter)
|
||||
{
|
||||
atom_inc(counter);
|
||||
}
|
||||
|
||||
|
||||
__kernel void globalAtomicKernelCounters32Broken( __global int* counter)
|
||||
{
|
||||
(*counter)++;
|
||||
}
|
||||
|
||||
201
Extras/RigidBodyGpuPipeline/opencl/global_atomics/main.cpp
Normal file
201
Extras/RigidBodyGpuPipeline/opencl/global_atomics/main.cpp
Normal file
@@ -0,0 +1,201 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library
|
||||
Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
///original author: Erwin Coumans
|
||||
|
||||
#include "../basic_initialize/btOpenCLUtils.h"
|
||||
#include <stdio.h>
|
||||
|
||||
cl_context g_cxMainContext;
|
||||
cl_command_queue g_cqCommandQue;
|
||||
cl_kernel g_atomicsKernel;
|
||||
static const size_t workGroupSize = 128;//todo figure out an appropriate workgroup size suitable for the OpenCL platform/context/device/kernel
|
||||
#define NUM_OBJECTS 1024
|
||||
|
||||
#include "globalAtomicsKernel.h"
|
||||
|
||||
|
||||
char * findAndReplace( char const * const original, char const * const pattern, char const * const replacement);
|
||||
|
||||
|
||||
#include <string.h>
|
||||
#include <malloc.h>
|
||||
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
int ciErrNum = 0;
|
||||
|
||||
printf("press a key to start\n");
|
||||
getchar();
|
||||
|
||||
const char* vendorSDK = btOpenCLUtils::getSdkVendorName();
|
||||
printf("This program was compiled using the %s OpenCL SDK\n",vendorSDK);
|
||||
|
||||
cl_device_type deviceType = CL_DEVICE_TYPE_GPU;//CL_DEVICE_TYPE_ALL
|
||||
|
||||
void* glCtx=0;
|
||||
void* glDC = 0;
|
||||
printf("Initialize OpenCL using btOpenCLUtils::createContextFromType for CL_DEVICE_TYPE_GPU\n");
|
||||
g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
int numDev = btOpenCLUtils::getNumDevices(g_cxMainContext);
|
||||
|
||||
if (numDev>0)
|
||||
{
|
||||
int deviceIndex=0;
|
||||
|
||||
cl_device_id device;
|
||||
device = btOpenCLUtils::getDevice(g_cxMainContext,deviceIndex);
|
||||
btOpenCLDeviceInfo clInfo;
|
||||
btOpenCLUtils::getDeviceInfo(device,clInfo);
|
||||
btOpenCLUtils::printDeviceInfo(device);
|
||||
|
||||
|
||||
const char* globalAtomicsKernelStringPatched = globalAtomicsKernelString;
|
||||
if (!strstr(clInfo.m_deviceExtensions,"cl_ext_atomic_counters_32"))
|
||||
{
|
||||
globalAtomicsKernelStringPatched = findAndReplace(globalAtomicsKernelString,"counter32_t", "volatile __global int*");
|
||||
}
|
||||
|
||||
|
||||
|
||||
// create a command-queue
|
||||
g_cqCommandQue = clCreateCommandQueue(g_cxMainContext, device, 0, &ciErrNum);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
cl_mem counterBuffer = clCreateBuffer(g_cxMainContext, CL_MEM_READ_WRITE, sizeof(int), NULL, &ciErrNum);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
char* kernelMethods[] =
|
||||
{
|
||||
"globalAtomicKernelOpenCL1_1",
|
||||
"counterAtomicKernelExt",
|
||||
"globalAtomicKernelExt",
|
||||
"globalAtomicKernelCounters32Broken"
|
||||
};
|
||||
int numKernelMethods = sizeof(kernelMethods)/sizeof(char*);
|
||||
|
||||
for (int i=0;i<numKernelMethods;i++)
|
||||
{
|
||||
int myCounter = 0;
|
||||
|
||||
//write to counterBuffer
|
||||
int deviceOffset=0;
|
||||
int hostOffset=0;
|
||||
|
||||
ciErrNum = clEnqueueWriteBuffer(g_cqCommandQue, counterBuffer,CL_FALSE, deviceOffset, sizeof(int), &myCounter, 0, NULL, NULL);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
g_atomicsKernel = btOpenCLUtils::compileCLKernelFromString(g_cxMainContext,device,globalAtomicsKernelStringPatched,kernelMethods[i], &ciErrNum);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
|
||||
|
||||
|
||||
ciErrNum = clSetKernelArg(g_atomicsKernel, 0, sizeof(cl_mem),(void*)&counterBuffer);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
size_t numWorkItems = workGroupSize*((NUM_OBJECTS + (workGroupSize-1)) / workGroupSize);
|
||||
ciErrNum = clEnqueueNDRangeKernel(g_cqCommandQue, g_atomicsKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
clFinish(g_cqCommandQue);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
//read from counterBuffer
|
||||
ciErrNum = clEnqueueReadBuffer(g_cqCommandQue, counterBuffer, CL_TRUE, deviceOffset, sizeof(int), &myCounter, 0, NULL, NULL);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
if (myCounter != NUM_OBJECTS)
|
||||
{
|
||||
printf("%s is broken, expected %d got %d\n",kernelMethods[i],NUM_OBJECTS,myCounter);
|
||||
} else
|
||||
{
|
||||
printf("%s success, got %d\n",kernelMethods[i],myCounter);
|
||||
}
|
||||
}
|
||||
|
||||
clReleaseCommandQueue(g_cqCommandQue);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
}
|
||||
|
||||
clReleaseContext(g_cxMainContext);
|
||||
|
||||
printf("press a key to end\n");
|
||||
getchar();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#ifdef _WIN32
|
||||
#pragma warning( push )
|
||||
#pragma warning( disable : 4996 )
|
||||
#endif //_WIN32
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
char * findAndReplace(
|
||||
char const * const original,
|
||||
char const * const pattern,
|
||||
char const * const replacement
|
||||
) {
|
||||
size_t const replen = strlen(replacement);
|
||||
size_t const patlen = strlen(pattern);
|
||||
size_t const orilen = strlen(original);
|
||||
|
||||
size_t patcnt = 0;
|
||||
const char * oriptr;
|
||||
const char * patloc;
|
||||
|
||||
// find how many times the pattern occurs in the original string
|
||||
for (oriptr = original; patloc = strstr(oriptr, pattern); oriptr = patloc + patlen)
|
||||
{
|
||||
patcnt++;
|
||||
}
|
||||
|
||||
{
|
||||
// allocate memory for the new string
|
||||
size_t const retlen = orilen + patcnt * (replen - patlen);
|
||||
char * const returned = (char *) malloc( sizeof(char) * (retlen + 1) );
|
||||
|
||||
if (returned != NULL)
|
||||
{
|
||||
// copy the original string,
|
||||
// replacing all the instances of the pattern
|
||||
char * retptr = returned;
|
||||
for (oriptr = original; patloc = strstr(oriptr, pattern); oriptr = patloc + patlen)
|
||||
{
|
||||
size_t const skplen = patloc - oriptr;
|
||||
// copy the section until the occurence of the pattern
|
||||
strncpy(retptr, oriptr, skplen);
|
||||
retptr += skplen;
|
||||
// copy the replacement
|
||||
strncpy(retptr, replacement, replen);
|
||||
retptr += replen;
|
||||
}
|
||||
// copy the rest of the string.
|
||||
strcpy(retptr, oriptr);
|
||||
}
|
||||
return returned;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
#pragma warning( pop )
|
||||
#endif //_WIN32
|
||||
@@ -0,0 +1,4 @@
|
||||
|
||||
include "AMD"
|
||||
--include "Intel"
|
||||
--include "NVIDIA"
|
||||
@@ -0,0 +1,13 @@
|
||||
#!/usr/bin/env python
|
||||
import sys
|
||||
import os
|
||||
import shutil
|
||||
|
||||
arg = sys.argv[1]
|
||||
fh = open(arg)
|
||||
|
||||
print 'static const char* '+sys.argv[2]+'= \\'
|
||||
for line in fh.readlines():
|
||||
a = line.strip('\n')
|
||||
print '"'+a+'\\n"'
|
||||
print ';'
|
||||
@@ -0,0 +1,5 @@
|
||||
stringify.py global_atomics.cl globalAtomicsKernelString >globalAtomicsKernel.h
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,58 @@
|
||||
|
||||
hasCL = findOpenCL_AMD()
|
||||
|
||||
if (hasCL) then
|
||||
|
||||
project "OpenCL_gpu_rigidbody_pipeline_AMD"
|
||||
|
||||
initOpenCL_AMD()
|
||||
|
||||
language "C++"
|
||||
|
||||
kind "ConsoleApp"
|
||||
targetdir "../../../bin"
|
||||
|
||||
initOpenGL()
|
||||
initGlut()
|
||||
initGlew()
|
||||
|
||||
|
||||
includedirs {
|
||||
"../../../rendering/BulletMath",
|
||||
"../../primitives",
|
||||
"../../../../../src"
|
||||
}
|
||||
|
||||
files {
|
||||
"../main.cpp",
|
||||
"../btConvexUtility.cpp",
|
||||
"../btConvexUtility.h",
|
||||
"../btGpuNarrowPhaseAndSolver.cpp",
|
||||
"../btGpuNarrowPhaseAndSolver.h",
|
||||
"../../../dynamics/basic_demo/ConvexHeightFieldShape.cpp",
|
||||
"../../../dynamics/basic_demo/ConvexHeightFieldShape.h",
|
||||
"../../../../../src/LinearMath/btConvexHullComputer.cpp",
|
||||
"../../../../../src/LinearMath/btConvexHullComputer.h",
|
||||
"../../broadphase_benchmark/findPairsOpenCL.cpp",
|
||||
"../../broadphase_benchmark/findPairsOpenCL.h",
|
||||
"../../broadphase_benchmark/btGridBroadphaseCL.cpp",
|
||||
"../../broadphase_benchmark/btGridBroadphaseCL.h",
|
||||
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.cpp",
|
||||
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.h",
|
||||
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.cpp",
|
||||
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.h",
|
||||
"../../../../../src/LinearMath/btAlignedAllocator.cpp",
|
||||
"../../../../../src/LinearMath/btQuickprof.cpp",
|
||||
"../../../../../src/LinearMath/btQuickprof.h",
|
||||
"../../../../../src/BulletCollision/BroadphaseCollision/btBroadphaseProxy.cpp",
|
||||
"../../../../../src/BulletCollision/BroadphaseCollision/btOverlappingPairCache.cpp",
|
||||
"../../../../../src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.h",
|
||||
"../../opengl_interop/btOpenCLGLInteropBuffer.cpp",
|
||||
"../../opengl_interop/btOpenCLGLInteropBuffer.h",
|
||||
"../../opengl_interop/btStopwatch.cpp",
|
||||
"../../opengl_interop/btStopwatch.h"
|
||||
}
|
||||
|
||||
end
|
||||
@@ -0,0 +1,91 @@
|
||||
#ifndef COMMAND_LINE_ARGS_H
|
||||
#define COMMAND_LINE_ARGS_H
|
||||
|
||||
/******************************************************************************
|
||||
* Command-line parsing
|
||||
******************************************************************************/
|
||||
#include <map>
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
class CommandLineArgs
|
||||
{
|
||||
protected:
|
||||
|
||||
std::map<std::string, std::string> pairs;
|
||||
|
||||
public:
|
||||
|
||||
// Constructor
|
||||
CommandLineArgs(int argc, char **argv)
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
for (int i = 1; i < argc; i++)
|
||||
{
|
||||
string arg = argv[i];
|
||||
|
||||
if ((arg[0] != '-') || (arg[1] != '-')) {
|
||||
continue;
|
||||
}
|
||||
|
||||
string::size_type pos;
|
||||
string key, val;
|
||||
if ((pos = arg.find( '=')) == string::npos) {
|
||||
key = string(arg, 2, arg.length() - 2);
|
||||
val = "";
|
||||
} else {
|
||||
key = string(arg, 2, pos - 2);
|
||||
val = string(arg, pos + 1, arg.length() - 1);
|
||||
}
|
||||
pairs[key] = val;
|
||||
}
|
||||
}
|
||||
|
||||
bool CheckCmdLineFlag(const char* arg_name)
|
||||
{
|
||||
using namespace std;
|
||||
map<string, string>::iterator itr;
|
||||
if ((itr = pairs.find(arg_name)) != pairs.end()) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void GetCmdLineArgument(const char *arg_name, T &val);
|
||||
|
||||
int ParsedArgc()
|
||||
{
|
||||
return pairs.size();
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
void CommandLineArgs::GetCmdLineArgument(const char *arg_name, T &val)
|
||||
{
|
||||
using namespace std;
|
||||
map<string, string>::iterator itr;
|
||||
if ((itr = pairs.find(arg_name)) != pairs.end()) {
|
||||
istringstream strstream(itr->second);
|
||||
strstream >> val;
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
void CommandLineArgs::GetCmdLineArgument<char*>(const char* arg_name, char* &val)
|
||||
{
|
||||
using namespace std;
|
||||
map<string, string>::iterator itr;
|
||||
if ((itr = pairs.find(arg_name)) != pairs.end()) {
|
||||
|
||||
string s = itr->second;
|
||||
val = (char*) malloc(sizeof(char) * (s.length() + 1));
|
||||
strcpy(val, s.c_str());
|
||||
|
||||
} else {
|
||||
val = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
#endif //COMMAND_LINE_ARGS_H
|
||||
@@ -0,0 +1,58 @@
|
||||
|
||||
hasCL = findOpenCL_Intel()
|
||||
|
||||
if (hasCL) then
|
||||
|
||||
project "OpenCL_gpu_rigidbody_pipeline_Intel"
|
||||
|
||||
initOpenCL_Intel()
|
||||
|
||||
language "C++"
|
||||
|
||||
kind "ConsoleApp"
|
||||
targetdir "../../../bin"
|
||||
|
||||
initOpenGL()
|
||||
initGlut()
|
||||
initGlew()
|
||||
|
||||
|
||||
includedirs {
|
||||
"../../../rendering/BulletMath",
|
||||
"../../primitives",
|
||||
"../../../../../src"
|
||||
}
|
||||
|
||||
files {
|
||||
"../main.cpp",
|
||||
"../btConvexUtility.cpp",
|
||||
"../btConvexUtility.h",
|
||||
"../btGpuNarrowPhaseAndSolver.cpp",
|
||||
"../btGpuNarrowPhaseAndSolver.h",
|
||||
"../../../dynamics/basic_demo/ConvexHeightFieldShape.cpp",
|
||||
"../../../dynamics/basic_demo/ConvexHeightFieldShape.h",
|
||||
"../../../../../src/LinearMath/btConvexHullComputer.cpp",
|
||||
"../../../../../src/LinearMath/btConvexHullComputer.h",
|
||||
"../../broadphase_benchmark/findPairsOpenCL.cpp",
|
||||
"../../broadphase_benchmark/findPairsOpenCL.h",
|
||||
"../../broadphase_benchmark/btGridBroadphaseCL.cpp",
|
||||
"../../broadphase_benchmark/btGridBroadphaseCL.h",
|
||||
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.cpp",
|
||||
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.h",
|
||||
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.cpp",
|
||||
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.h",
|
||||
"../../../../../src/LinearMath/btAlignedAllocator.cpp",
|
||||
"../../../../../src/LinearMath/btQuickprof.cpp",
|
||||
"../../../../../src/LinearMath/btQuickprof.h",
|
||||
"../../../../../src/BulletCollision/BroadphaseCollision/btBroadphaseProxy.cpp",
|
||||
"../../../../../src/BulletCollision/BroadphaseCollision/btOverlappingPairCache.cpp",
|
||||
"../../../../../src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.h",
|
||||
"../../opengl_interop/btOpenCLGLInteropBuffer.cpp",
|
||||
"../../opengl_interop/btOpenCLGLInteropBuffer.h",
|
||||
"../../opengl_interop/btStopwatch.cpp",
|
||||
"../../opengl_interop/btStopwatch.h"
|
||||
}
|
||||
|
||||
end
|
||||
@@ -0,0 +1,57 @@
|
||||
|
||||
hasCL = findOpenCL_NVIDIA()
|
||||
|
||||
if (hasCL) then
|
||||
|
||||
project "OpenCL_gpu_rigidbody_pipeline_NVIDIA"
|
||||
|
||||
initOpenCL_NVIDIA()
|
||||
|
||||
language "C++"
|
||||
|
||||
kind "ConsoleApp"
|
||||
targetdir "../../../bin"
|
||||
|
||||
initOpenGL()
|
||||
initGlut()
|
||||
initGlew()
|
||||
|
||||
includedirs {
|
||||
"../../../rendering/BulletMath",
|
||||
"../../primitives",
|
||||
"../../../../../src"
|
||||
}
|
||||
|
||||
files {
|
||||
"../main.cpp",
|
||||
"../btConvexUtility.cpp",
|
||||
"../btConvexUtility.h",
|
||||
"../btGpuNarrowPhaseAndSolver.cpp",
|
||||
"../btGpuNarrowPhaseAndSolver.h",
|
||||
"../../../dynamics/basic_demo/ConvexHeightFieldShape.cpp",
|
||||
"../../../dynamics/basic_demo/ConvexHeightFieldShape.h",
|
||||
"../../../../../src/LinearMath/btConvexHullComputer.cpp",
|
||||
"../../../../../src/LinearMath/btConvexHullComputer.h",
|
||||
"../../broadphase_benchmark/findPairsOpenCL.cpp",
|
||||
"../../broadphase_benchmark/findPairsOpenCL.h",
|
||||
"../../broadphase_benchmark/btGridBroadphaseCL.cpp",
|
||||
"../../broadphase_benchmark/btGridBroadphaseCL.h",
|
||||
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.cpp",
|
||||
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.h",
|
||||
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.cpp",
|
||||
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.h",
|
||||
"../../../../../src/LinearMath/btAlignedAllocator.cpp",
|
||||
"../../../../../src/LinearMath/btQuickprof.cpp",
|
||||
"../../../../../src/LinearMath/btQuickprof.h",
|
||||
"../../../../../src/BulletCollision/BroadphaseCollision/btBroadphaseProxy.cpp",
|
||||
"../../../../../src/BulletCollision/BroadphaseCollision/btOverlappingPairCache.cpp",
|
||||
"../../../../../src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.h",
|
||||
"../../opengl_interop/btOpenCLGLInteropBuffer.cpp",
|
||||
"../../opengl_interop/btOpenCLGLInteropBuffer.h",
|
||||
"../../opengl_interop/btStopwatch.cpp",
|
||||
"../../opengl_interop/btStopwatch.h"
|
||||
}
|
||||
|
||||
end
|
||||
@@ -0,0 +1,240 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Erwin Coumans
|
||||
|
||||
|
||||
#include "btConvexUtility.h"
|
||||
#include "LinearMath/btConvexHullComputer.h"
|
||||
#include "LinearMath/btGrahamScan2dConvexHull.h"
|
||||
#include "LinearMath/btQuaternion.h"
|
||||
|
||||
bool btConvexUtility::initializePolyhedralFeatures(const btAlignedObjectArray<btVector3>& orgVertices, bool mergeCoplanarTriangles)
|
||||
{
|
||||
|
||||
|
||||
btConvexHullComputer conv;
|
||||
conv.compute(&orgVertices[0].getX(), sizeof(btVector3),orgVertices.size(),0.f,0.f);
|
||||
|
||||
btAlignedObjectArray<btVector3> faceNormals;
|
||||
int numFaces = conv.faces.size();
|
||||
faceNormals.resize(numFaces);
|
||||
btConvexHullComputer* convexUtil = &conv;
|
||||
|
||||
|
||||
btAlignedObjectArray<btFace> tmpFaces;
|
||||
tmpFaces.resize(numFaces);
|
||||
|
||||
int numVertices = convexUtil->vertices.size();
|
||||
m_vertices.resize(numVertices);
|
||||
for (int p=0;p<numVertices;p++)
|
||||
{
|
||||
m_vertices[p] = convexUtil->vertices[p];
|
||||
}
|
||||
|
||||
|
||||
for (int i=0;i<numFaces;i++)
|
||||
{
|
||||
int face = convexUtil->faces[i];
|
||||
//printf("face=%d\n",face);
|
||||
const btConvexHullComputer::Edge* firstEdge = &convexUtil->edges[face];
|
||||
const btConvexHullComputer::Edge* edge = firstEdge;
|
||||
|
||||
btVector3 edges[3];
|
||||
int numEdges = 0;
|
||||
//compute face normals
|
||||
|
||||
btScalar maxCross2 = 0.f;
|
||||
int chosenEdge = -1;
|
||||
|
||||
do
|
||||
{
|
||||
|
||||
int src = edge->getSourceVertex();
|
||||
tmpFaces[i].m_indices.push_back(src);
|
||||
int targ = edge->getTargetVertex();
|
||||
btVector3 wa = convexUtil->vertices[src];
|
||||
|
||||
btVector3 wb = convexUtil->vertices[targ];
|
||||
btVector3 newEdge = wb-wa;
|
||||
newEdge.normalize();
|
||||
if (numEdges<2)
|
||||
edges[numEdges++] = newEdge;
|
||||
|
||||
edge = edge->getNextEdgeOfFace();
|
||||
} while (edge!=firstEdge);
|
||||
|
||||
btScalar planeEq = 1e30f;
|
||||
|
||||
|
||||
if (numEdges==2)
|
||||
{
|
||||
faceNormals[i] = edges[0].cross(edges[1]);
|
||||
faceNormals[i].normalize();
|
||||
tmpFaces[i].m_plane[0] = faceNormals[i].getX();
|
||||
tmpFaces[i].m_plane[1] = faceNormals[i].getY();
|
||||
tmpFaces[i].m_plane[2] = faceNormals[i].getZ();
|
||||
tmpFaces[i].m_plane[3] = planeEq;
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
btAssert(0);//degenerate?
|
||||
faceNormals[i].setZero();
|
||||
}
|
||||
|
||||
for (int v=0;v<tmpFaces[i].m_indices.size();v++)
|
||||
{
|
||||
btScalar eq = m_vertices[tmpFaces[i].m_indices[v]].dot(faceNormals[i]);
|
||||
if (planeEq>eq)
|
||||
{
|
||||
planeEq=eq;
|
||||
}
|
||||
}
|
||||
tmpFaces[i].m_plane[3] = -planeEq;
|
||||
}
|
||||
|
||||
//merge coplanar faces
|
||||
|
||||
btScalar faceWeldThreshold= 0.999f;
|
||||
btAlignedObjectArray<int> todoFaces;
|
||||
for (int i=0;i<tmpFaces.size();i++)
|
||||
todoFaces.push_back(i);
|
||||
|
||||
while (todoFaces.size())
|
||||
{
|
||||
btAlignedObjectArray<int> coplanarFaceGroup;
|
||||
int refFace = todoFaces[todoFaces.size()-1];
|
||||
|
||||
coplanarFaceGroup.push_back(refFace);
|
||||
btFace& faceA = tmpFaces[refFace];
|
||||
todoFaces.pop_back();
|
||||
|
||||
btVector3 faceNormalA(faceA.m_plane[0],faceA.m_plane[1],faceA.m_plane[2]);
|
||||
for (int j=todoFaces.size()-1;j>=0;j--)
|
||||
{
|
||||
int i = todoFaces[j];
|
||||
btFace& faceB = tmpFaces[i];
|
||||
btVector3 faceNormalB(faceB.m_plane[0],faceB.m_plane[1],faceB.m_plane[2]);
|
||||
if (faceNormalA.dot(faceNormalB)>faceWeldThreshold)
|
||||
{
|
||||
coplanarFaceGroup.push_back(i);
|
||||
todoFaces.remove(i);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool did_merge = false;
|
||||
if (mergeCoplanarTriangles && coplanarFaceGroup.size()>1)
|
||||
{
|
||||
//do the merge: use Graham Scan 2d convex hull
|
||||
|
||||
btAlignedObjectArray<GrahamVector2> orgpoints;
|
||||
|
||||
for (int i=0;i<coplanarFaceGroup.size();i++)
|
||||
{
|
||||
|
||||
btFace& face = tmpFaces[coplanarFaceGroup[i]];
|
||||
btVector3 faceNormal(face.m_plane[0],face.m_plane[1],face.m_plane[2]);
|
||||
btVector3 xyPlaneNormal(0,0,1);
|
||||
|
||||
btQuaternion rotationArc = shortestArcQuat(faceNormal,xyPlaneNormal);
|
||||
|
||||
for (int f=0;f<face.m_indices.size();f++)
|
||||
{
|
||||
int orgIndex = face.m_indices[f];
|
||||
btVector3 pt = m_vertices[orgIndex];
|
||||
btVector3 rotatedPt = quatRotate(rotationArc,pt);
|
||||
rotatedPt.setZ(0);
|
||||
bool found = false;
|
||||
|
||||
for (int i=0;i<orgpoints.size();i++)
|
||||
{
|
||||
//if ((orgpoints[i].m_orgIndex == orgIndex) || ((rotatedPt-orgpoints[i]).length2()<0.0001))
|
||||
if (orgpoints[i].m_orgIndex == orgIndex)
|
||||
{
|
||||
found=true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found)
|
||||
orgpoints.push_back(GrahamVector2(rotatedPt,orgIndex));
|
||||
}
|
||||
}
|
||||
|
||||
btFace combinedFace;
|
||||
for (int i=0;i<4;i++)
|
||||
combinedFace.m_plane[i] = tmpFaces[coplanarFaceGroup[0]].m_plane[i];
|
||||
|
||||
btAlignedObjectArray<GrahamVector2> hull;
|
||||
GrahamScanConvexHull2D(orgpoints,hull);
|
||||
|
||||
for (int i=0;i<hull.size();i++)
|
||||
{
|
||||
combinedFace.m_indices.push_back(hull[i].m_orgIndex);
|
||||
for(int k = 0; k < orgpoints.size(); k++) {
|
||||
if(orgpoints[k].m_orgIndex == hull[i].m_orgIndex) {
|
||||
orgpoints[k].m_orgIndex = -1; // invalidate...
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// are there rejected vertices?
|
||||
bool reject_merge = false;
|
||||
for(int i = 0; i < orgpoints.size(); i++) {
|
||||
if(orgpoints[i].m_orgIndex == -1)
|
||||
continue; // this is in the hull...
|
||||
// this vertex is rejected -- is anybody else using this vertex?
|
||||
for(int j = 0; j < tmpFaces.size(); j++) {
|
||||
btFace& face = tmpFaces[j];
|
||||
// is this a face of the current coplanar group?
|
||||
bool is_in_current_group = false;
|
||||
for(int k = 0; k < coplanarFaceGroup.size(); k++) {
|
||||
if(coplanarFaceGroup[k] == j) {
|
||||
is_in_current_group = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(is_in_current_group) // ignore this face...
|
||||
continue;
|
||||
// does this face use this rejected vertex?
|
||||
for(int v = 0; v < face.m_indices.size(); v++) {
|
||||
if(face.m_indices[v] == orgpoints[i].m_orgIndex) {
|
||||
// this rejected vertex is used in another face -- reject merge
|
||||
reject_merge = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(reject_merge)
|
||||
break;
|
||||
}
|
||||
if(reject_merge)
|
||||
break;
|
||||
}
|
||||
if(!reject_merge) {
|
||||
// do this merge!
|
||||
did_merge = true;
|
||||
m_faces.push_back(combinedFace);
|
||||
}
|
||||
}
|
||||
if(!did_merge)
|
||||
{
|
||||
for (int i=0;i<coplanarFaceGroup.size();i++)
|
||||
{
|
||||
m_faces.push_back(tmpFaces[coplanarFaceGroup[i]]);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Erwin Coumans
|
||||
|
||||
#ifndef _BT_CONVEX_UTILITY_H
|
||||
#define _BT_CONVEX_UTILITY_H
|
||||
|
||||
#include "LinearMath/btAlignedObjectArray.h"
|
||||
#include "LinearMath/btVector3.h"
|
||||
|
||||
struct btFace
|
||||
{
|
||||
btAlignedObjectArray<int> m_indices;
|
||||
// btAlignedObjectArray<int> m_connectedFaces;
|
||||
btScalar m_plane[4];
|
||||
};
|
||||
|
||||
class btConvexUtility
|
||||
{
|
||||
public:
|
||||
|
||||
btAlignedObjectArray<btVector3> m_vertices;
|
||||
btAlignedObjectArray<btFace> m_faces;
|
||||
|
||||
bool initializePolyhedralFeatures(const btAlignedObjectArray<btVector3>& orgVertices, bool mergeCoplanarTriangles);
|
||||
|
||||
};
|
||||
#endif
|
||||
|
||||
@@ -0,0 +1,730 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Erwin Coumans
|
||||
|
||||
#include "btGpuNarrowphaseAndSolver.h"
|
||||
|
||||
//#include "CustomConvexShape.h"
|
||||
//#include "CustomConvexPairCollision.h"
|
||||
#include "LinearMath/btQuickprof.h"
|
||||
|
||||
|
||||
//#include "BulletDynamics/Dynamics/btRigidBody.h"
|
||||
|
||||
#include "Adl/Adl.h"
|
||||
#include "../../dynamics/basic_demo/Stubs/AdlMath.h"
|
||||
#include "../../dynamics/basic_demo/Stubs/AdlContact4.h"
|
||||
#include "../../dynamics/basic_demo/Stubs/AdlQuaternion.h"
|
||||
#include "../../dynamics/basic_demo/Stubs/ChNarrowPhase.h"
|
||||
#include "../../dynamics/basic_demo/Stubs/Solver.h"
|
||||
#include <AdlPrimitives/Sort/RadixSort32.h>
|
||||
|
||||
int gpuBatchContacts = 1;
|
||||
|
||||
int numPairsOut =0;
|
||||
struct CPUSolveData
|
||||
{
|
||||
u32 m_n[adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT];
|
||||
u32 m_offset[adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT];
|
||||
};
|
||||
|
||||
|
||||
struct ParallelSolveData
|
||||
{
|
||||
adl::Buffer<u32>* m_numConstraints;
|
||||
adl::Buffer<u32>* m_offsets;
|
||||
};
|
||||
|
||||
struct CustomDispatchData
|
||||
{
|
||||
adl::DeviceCL* m_deviceCL;
|
||||
adl::Device* m_deviceHost;
|
||||
ShapeDataType m_ShapeBuffer;
|
||||
adl::HostBuffer<ConvexHeightField*>* m_shapePointers;
|
||||
|
||||
adl::HostBuffer<int2>* m_pBufPairsCPU;
|
||||
|
||||
adl::Buffer<int2>* m_convexPairsOutGPU;
|
||||
adl::Buffer<int2>* m_planePairs;
|
||||
|
||||
adl::Buffer<Contact4>* m_pBufContactOutGPU;
|
||||
adl::HostBuffer<Contact4>* m_pBufContactOutCPU;
|
||||
adl::ChNarrowphase<adl::TYPE_CL>::Data* m_Data;
|
||||
|
||||
|
||||
|
||||
adl::HostBuffer<RigidBodyBase::Body>* m_bodyBufferCPU;
|
||||
adl::Buffer<RigidBodyBase::Body>* m_bodyBufferGPU;
|
||||
|
||||
adl::Buffer<RigidBodyBase::Inertia>* m_inertiaBufferCPU;
|
||||
adl::Buffer<RigidBodyBase::Inertia>* m_inertiaBufferGPU;
|
||||
|
||||
adl::Solver<adl::TYPE_CL>::Data* m_solverDataGPU;
|
||||
SolverData m_contactCGPU;
|
||||
void* m_frictionCGPU;
|
||||
|
||||
int m_numAcceleratedShapes;
|
||||
int m_numAcceleratedRigidBodies;
|
||||
};
|
||||
|
||||
|
||||
btGpuNarrowphaseAndSolver::btGpuNarrowphaseAndSolver(adl::DeviceCL* deviceCL)
|
||||
:m_internalData(0) ,m_planeBodyIndex(-1)
|
||||
{
|
||||
|
||||
if (deviceCL)
|
||||
{
|
||||
m_internalData = new CustomDispatchData();
|
||||
memset(m_internalData,0,sizeof(CustomDispatchData));
|
||||
|
||||
adl::DeviceUtils::Config cfg;
|
||||
m_internalData->m_deviceCL = deviceCL;
|
||||
|
||||
|
||||
m_internalData->m_deviceHost = adl::DeviceUtils::allocate( adl::TYPE_HOST, cfg );
|
||||
m_internalData->m_pBufPairsCPU = new adl::HostBuffer<int2>(m_internalData->m_deviceHost, MAX_BROADPHASE_COLLISION_CL);
|
||||
|
||||
m_internalData->m_convexPairsOutGPU = new adl::Buffer<int2>(m_internalData->m_deviceCL,MAX_BROADPHASE_COLLISION_CL);
|
||||
m_internalData->m_planePairs = new adl::Buffer<int2>(m_internalData->m_deviceCL,MAX_BROADPHASE_COLLISION_CL);
|
||||
|
||||
m_internalData->m_pBufContactOutCPU = new adl::HostBuffer<Contact4>(m_internalData->m_deviceHost, MAX_BROADPHASE_COLLISION_CL);
|
||||
m_internalData->m_bodyBufferCPU = new adl::HostBuffer<RigidBodyBase::Body>(m_internalData->m_deviceHost, MAX_CONVEX_BODIES_CL);
|
||||
|
||||
m_internalData->m_inertiaBufferCPU = new adl::Buffer<RigidBodyBase::Inertia>(m_internalData->m_deviceHost,MAX_CONVEX_BODIES_CL);
|
||||
m_internalData->m_pBufContactOutGPU = new adl::Buffer<Contact4>(m_internalData->m_deviceCL, MAX_BROADPHASE_COLLISION_CL);
|
||||
m_internalData->m_inertiaBufferGPU = new adl::Buffer<RigidBodyBase::Inertia>(m_internalData->m_deviceCL,MAX_CONVEX_BODIES_CL);
|
||||
|
||||
m_internalData->m_solverDataGPU = adl::Solver<adl::TYPE_CL>::allocate( m_internalData->m_deviceCL, MAX_BROADPHASE_COLLISION_CL);
|
||||
m_internalData->m_bodyBufferGPU = new adl::Buffer<RigidBodyBase::Body>(m_internalData->m_deviceCL, MAX_CONVEX_BODIES_CL);
|
||||
m_internalData->m_Data = adl::ChNarrowphase<adl::TYPE_CL>::allocate(m_internalData->m_deviceCL);
|
||||
// m_internalData->m_DataCPU = adl::ChNarrowphase<adl::TYPE_HOST>::allocate(m_internalData->m_deviceHost);
|
||||
|
||||
|
||||
m_internalData->m_ShapeBuffer = adl::ChNarrowphase<adl::TYPE_CL>::allocateShapeBuffer(m_internalData->m_deviceCL, MAX_CONVEX_SHAPES_CL);
|
||||
|
||||
m_internalData->m_shapePointers = new adl::HostBuffer<ConvexHeightField*>(m_internalData->m_deviceHost,MAX_CONVEX_SHAPES_CL);
|
||||
|
||||
m_internalData->m_numAcceleratedShapes = 0;
|
||||
m_internalData->m_numAcceleratedRigidBodies = 0;
|
||||
|
||||
m_internalData->m_contactCGPU = adl::Solver<adl::TYPE_CL>::allocateConstraint4( m_internalData->m_deviceCL, MAX_BROADPHASE_COLLISION_CL);
|
||||
m_internalData->m_frictionCGPU = adl::Solver<adl::TYPE_CL>::allocateFrictionConstraint( m_internalData->m_deviceCL, MAX_BROADPHASE_COLLISION_CL);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
int btGpuNarrowphaseAndSolver::registerShape(ConvexHeightField* convexShape)
|
||||
{
|
||||
(*m_internalData->m_shapePointers)[m_internalData->m_numAcceleratedShapes] = convexShape;
|
||||
adl::ChNarrowphase<adl::TYPE_CL>::setShape(m_internalData->m_ShapeBuffer, convexShape, m_internalData->m_numAcceleratedShapes, 0.01f);
|
||||
return m_internalData->m_numAcceleratedShapes++;
|
||||
}
|
||||
|
||||
cl_mem btGpuNarrowphaseAndSolver::getBodiesGpu()
|
||||
{
|
||||
return (cl_mem)m_internalData->m_bodyBufferGPU->m_ptr;
|
||||
}
|
||||
|
||||
cl_mem btGpuNarrowphaseAndSolver::getBodyInertiasGpu()
|
||||
{
|
||||
return (cl_mem)m_internalData->m_inertiaBufferGPU->m_ptr;
|
||||
}
|
||||
|
||||
|
||||
int btGpuNarrowphaseAndSolver::registerRigidBody(int shapeIndex, float mass, const float* position, const float* orientation , bool writeToGpu)
|
||||
{
|
||||
assert(m_internalData->m_numAcceleratedRigidBodies< (MAX_CONVEX_BODIES_CL-1));
|
||||
|
||||
RigidBodyBase::Body& body = m_internalData->m_bodyBufferCPU->m_ptr[m_internalData->m_numAcceleratedRigidBodies];
|
||||
|
||||
float friction = 1.f;
|
||||
float restitution = 0.f;
|
||||
|
||||
body.m_frictionCoeff = friction;
|
||||
body.m_restituitionCoeff = restitution;
|
||||
body.m_angVel = make_float4(0.f);
|
||||
body.m_linVel = make_float4(0.f);
|
||||
body.m_pos = make_float4(position[0],position[1],position[2],0.f);
|
||||
body.m_quat = make_float4(orientation[0],orientation[1],orientation[2],orientation[3]);
|
||||
body.m_shapeIdx = shapeIndex;
|
||||
if (shapeIndex<0)
|
||||
{
|
||||
body.m_shapeType = CollisionShape::SHAPE_PLANE;
|
||||
m_planeBodyIndex = m_internalData->m_numAcceleratedRigidBodies;
|
||||
} else
|
||||
{
|
||||
body.m_shapeType = CollisionShape::SHAPE_CONVEX_HEIGHT_FIELD;
|
||||
}
|
||||
|
||||
body.m_invMass = mass? 1.f/mass : 0.f;
|
||||
|
||||
if (writeToGpu)
|
||||
m_internalData->m_bodyBufferGPU->write(&body,1,m_internalData->m_numAcceleratedRigidBodies);
|
||||
|
||||
RigidBodyBase::Inertia& shapeInfo = m_internalData->m_inertiaBufferCPU->m_ptr[m_internalData->m_numAcceleratedRigidBodies];
|
||||
|
||||
if (mass==0.f)
|
||||
{
|
||||
shapeInfo.m_initInvInertia = mtZero();
|
||||
shapeInfo.m_invInertia = mtZero();
|
||||
} else
|
||||
{
|
||||
|
||||
assert(body.m_shapeIdx>=0);
|
||||
|
||||
//approximate using the aabb of the shape
|
||||
|
||||
Aabb aabb = (*m_internalData->m_shapePointers)[shapeIndex]->m_aabb;
|
||||
float4 halfExtents = (aabb.m_max - aabb.m_min);
|
||||
|
||||
float4 localInertia;
|
||||
|
||||
float lx=2.f*halfExtents.x;
|
||||
float ly=2.f*halfExtents.y;
|
||||
float lz=2.f*halfExtents.z;
|
||||
|
||||
localInertia = make_float4( (mass/12.0f) * (ly*ly + lz*lz),
|
||||
(mass/12.0f) * (lx*lx + lz*lz),
|
||||
(mass/12.0f) * (lx*lx + ly*ly));
|
||||
|
||||
float4 invLocalInertia;
|
||||
invLocalInertia.x = 1.f/localInertia.x;
|
||||
invLocalInertia.y = 1.f/localInertia.y;
|
||||
invLocalInertia.z = 1.f/localInertia.z;
|
||||
invLocalInertia.w = 0.f;
|
||||
|
||||
shapeInfo.m_initInvInertia = mtZero();
|
||||
shapeInfo.m_initInvInertia.m_row[0].x = invLocalInertia.x;
|
||||
shapeInfo.m_initInvInertia.m_row[1].y = invLocalInertia.y;
|
||||
shapeInfo.m_initInvInertia.m_row[2].z = invLocalInertia.z;
|
||||
|
||||
Matrix3x3 m = qtGetRotationMatrix( body.m_quat);
|
||||
Matrix3x3 mT = mtTranspose( m );
|
||||
shapeInfo.m_invInertia = mtMul( mtMul( m, shapeInfo.m_initInvInertia ), mT );
|
||||
|
||||
}
|
||||
|
||||
if (writeToGpu)
|
||||
m_internalData->m_inertiaBufferGPU->write(&shapeInfo,1,m_internalData->m_numAcceleratedRigidBodies);
|
||||
return m_internalData->m_numAcceleratedRigidBodies++;
|
||||
}
|
||||
|
||||
void btGpuNarrowphaseAndSolver::writeAllBodiesToGpu()
|
||||
{
|
||||
m_internalData->m_bodyBufferGPU->write(m_internalData->m_bodyBufferCPU->m_ptr,m_internalData->m_numAcceleratedRigidBodies);
|
||||
m_internalData->m_inertiaBufferGPU->write( m_internalData->m_inertiaBufferCPU->m_ptr,m_internalData->m_numAcceleratedRigidBodies);
|
||||
}
|
||||
|
||||
|
||||
|
||||
btGpuNarrowphaseAndSolver::~btGpuNarrowphaseAndSolver(void)
|
||||
{
|
||||
if (m_internalData)
|
||||
{
|
||||
delete m_internalData->m_pBufPairsCPU;
|
||||
delete m_internalData->m_convexPairsOutGPU;
|
||||
delete m_internalData->m_planePairs;
|
||||
delete m_internalData->m_pBufContactOutGPU;
|
||||
delete m_internalData->m_inertiaBufferGPU;
|
||||
delete m_internalData->m_pBufContactOutCPU;
|
||||
delete m_internalData->m_shapePointers;
|
||||
adl::ChNarrowphase<adl::TYPE_CL>::deallocateShapeBuffer(m_internalData->m_ShapeBuffer);
|
||||
delete m_internalData->m_inertiaBufferCPU;
|
||||
adl::Solver<adl::TYPE_CL>::deallocateConstraint4( m_internalData->m_contactCGPU );
|
||||
adl::Solver<adl::TYPE_CL>::deallocateFrictionConstraint( m_internalData->m_frictionCGPU );
|
||||
|
||||
delete m_internalData->m_bodyBufferGPU;
|
||||
adl::Solver<adl::TYPE_CL>::deallocate( m_internalData->m_solverDataGPU);
|
||||
delete m_internalData->m_bodyBufferCPU;
|
||||
adl::ChNarrowphase<adl::TYPE_CL>::deallocate(m_internalData->m_Data);
|
||||
|
||||
|
||||
|
||||
adl::DeviceUtils::deallocate(m_internalData->m_deviceHost);
|
||||
|
||||
delete m_internalData;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void btGpuNarrowphaseAndSolver::computeContactsAndSolver(cl_mem broadphasePairs, int numBroadphasePairs)
|
||||
{
|
||||
|
||||
BT_PROFILE("computeContactsAndSolver");
|
||||
bool bGPU = (m_internalData != 0);
|
||||
int maxBodyIndex = m_internalData->m_numAcceleratedRigidBodies;
|
||||
|
||||
if (!maxBodyIndex)
|
||||
return;
|
||||
int numOfConvexRBodies = maxBodyIndex;
|
||||
|
||||
adl::ChNarrowphaseBase::Config cfgNP;
|
||||
cfgNP.m_collisionMargin = 0.01f;
|
||||
int nContactOut = 0;
|
||||
//printf("convexPairsOut.m_size = %d\n",m_internalData->m_convexPairsOutGPU->m_size);
|
||||
|
||||
|
||||
adl::Buffer<int2> broadphasePairsGPU;
|
||||
broadphasePairsGPU.m_ptr = (int2*)broadphasePairs;
|
||||
broadphasePairsGPU.m_size = numBroadphasePairs;
|
||||
broadphasePairsGPU.m_device = m_internalData->m_deviceCL;
|
||||
|
||||
|
||||
bool useCulling = true;
|
||||
if (useCulling)
|
||||
{
|
||||
BT_PROFILE("ChNarrowphase::culling");
|
||||
adl::DeviceUtils::waitForCompletion(m_internalData->m_deviceCL);
|
||||
|
||||
numPairsOut = adl::ChNarrowphase<adl::TYPE_CL>::culling(
|
||||
m_internalData->m_Data,
|
||||
&broadphasePairsGPU,
|
||||
numBroadphasePairs,
|
||||
m_internalData->m_bodyBufferGPU, m_internalData->m_ShapeBuffer,
|
||||
m_internalData->m_convexPairsOutGPU,
|
||||
cfgNP);
|
||||
}
|
||||
|
||||
{
|
||||
BT_PROFILE("ChNarrowphase::execute");
|
||||
if (useCulling)
|
||||
{
|
||||
|
||||
if (m_planeBodyIndex>=0)
|
||||
{
|
||||
BT_PROFILE("ChNarrowphase:: plane versus convex");
|
||||
//todo: get rid of this dynamic allocation
|
||||
int2* hostPairs = new int2[m_internalData->m_numAcceleratedRigidBodies-1];
|
||||
int index=0;
|
||||
for (int i=0;i<m_internalData->m_numAcceleratedRigidBodies;i++)
|
||||
{
|
||||
if (i!=m_planeBodyIndex)
|
||||
{
|
||||
hostPairs[index].x = m_planeBodyIndex;
|
||||
hostPairs[index].y = i;
|
||||
index++;
|
||||
}
|
||||
}
|
||||
assert(m_internalData->m_numAcceleratedRigidBodies-1 == index);
|
||||
m_internalData->m_planePairs->write(hostPairs,index);
|
||||
adl::DeviceUtils::waitForCompletion(m_internalData->m_deviceCL);
|
||||
delete[]hostPairs;
|
||||
//convex versus plane
|
||||
adl::ChNarrowphase<adl::TYPE_CL>::execute(m_internalData->m_Data, m_internalData->m_planePairs, index, m_internalData->m_bodyBufferGPU, m_internalData->m_ShapeBuffer,
|
||||
0,0,m_internalData->m_pBufContactOutGPU, nContactOut, cfgNP);
|
||||
}
|
||||
|
||||
//convex versus convex
|
||||
adl::ChNarrowphase<adl::TYPE_CL>::execute(m_internalData->m_Data, m_internalData->m_convexPairsOutGPU,numPairsOut, m_internalData->m_bodyBufferGPU, m_internalData->m_ShapeBuffer, m_internalData->m_pBufContactOutGPU, nContactOut, cfgNP);
|
||||
} else
|
||||
{
|
||||
adl::ChNarrowphase<adl::TYPE_CL>::execute(m_internalData->m_Data, &broadphasePairsGPU, numBroadphasePairs, m_internalData->m_bodyBufferGPU, m_internalData->m_ShapeBuffer, m_internalData->m_pBufContactOutGPU, nContactOut, cfgNP);
|
||||
}
|
||||
|
||||
adl::DeviceUtils::waitForCompletion(m_internalData->m_deviceCL);
|
||||
}
|
||||
|
||||
if (!nContactOut)
|
||||
return;
|
||||
|
||||
|
||||
bool useSolver = true;//true;//false;
|
||||
|
||||
if (useSolver)
|
||||
{
|
||||
float dt=1./60.;
|
||||
adl::SolverBase::ConstraintCfg csCfg( dt );
|
||||
csCfg.m_enableParallelSolve = true;
|
||||
csCfg.m_averageExtent = 0.2f;//@TODO m_averageObjExtent;
|
||||
csCfg.m_staticIdx = m_planeBodyIndex;
|
||||
|
||||
|
||||
bool exposeInternalBatchImplementation=true;
|
||||
|
||||
adl::Solver<adl::TYPE_HOST>::Data* cpuSolverData = 0;
|
||||
if (exposeInternalBatchImplementation)
|
||||
{
|
||||
BT_PROFILE("Batching");
|
||||
|
||||
cpuSolverData = adl::Solver<adl::TYPE_HOST>::allocate( m_internalData->m_deviceHost, nContactOut);
|
||||
|
||||
adl::Buffer<Contact4>* contactsIn = m_internalData->m_pBufContactOutGPU;
|
||||
const adl::Buffer<RigidBodyBase::Body>* bodyBuf = m_internalData->m_bodyBufferGPU;
|
||||
void* additionalData = m_internalData->m_frictionCGPU;
|
||||
const adl::Buffer<RigidBodyBase::Inertia>* shapeBuf = m_internalData->m_inertiaBufferGPU;
|
||||
SolverData contactCOut = m_internalData->m_contactCGPU;
|
||||
int nContacts = nContactOut;
|
||||
|
||||
bool useCPU=false;
|
||||
|
||||
if (useCPU)
|
||||
{
|
||||
BT_PROFILE("CPU batch");
|
||||
{
|
||||
BT_PROFILE("CPU sortContacts2");
|
||||
sortContacts2( cpuSolverData, bodyBuf, contactsIn, additionalData, nContacts, csCfg );
|
||||
}
|
||||
|
||||
CPUSolveData* dataCPU = (CPUSolveData*)cpuSolverData->m_parallelSolveData;
|
||||
{
|
||||
BT_PROFILE("CPU batchContacts2");
|
||||
|
||||
adl::Buffer<u32> n; n.setRawPtr( cpuSolverData->m_device, dataCPU->m_n, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT );
|
||||
adl::Buffer<u32> offsets; offsets.setRawPtr( cpuSolverData->m_device, dataCPU->m_offset, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT );
|
||||
batchContacts2( cpuSolverData, contactsIn, nContacts, &n, &offsets, csCfg.m_staticIdx );
|
||||
}
|
||||
|
||||
{
|
||||
BT_PROFILE("CPU convertToConstraints2");
|
||||
convertToConstraints2( cpuSolverData, bodyBuf, shapeBuf, contactsIn, contactCOut, additionalData, nContacts, csCfg );
|
||||
}
|
||||
|
||||
{
|
||||
BT_PROFILE("CPU -> GPU copy");
|
||||
ParallelSolveData* dataGPU = (ParallelSolveData*)m_internalData->m_solverDataGPU->m_parallelSolveData;
|
||||
dataGPU->m_numConstraints->write(dataCPU->m_n,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
|
||||
dataGPU->m_offsets->write(dataCPU->m_offset,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
|
||||
adl::DeviceUtils::waitForCompletion( m_internalData->m_deviceCL);
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
BT_PROFILE("GPU batch");
|
||||
|
||||
adl::Solver<adl::TYPE_CL>::Data* data = m_internalData->m_solverDataGPU;
|
||||
|
||||
{
|
||||
if( data->m_contactBuffer )
|
||||
{
|
||||
if( data->m_contactBuffer->getSize() < nContacts )
|
||||
{
|
||||
BT_PROFILE("delete data->m_contactBuffer;");
|
||||
delete data->m_contactBuffer;
|
||||
data->m_contactBuffer = 0;
|
||||
}
|
||||
}
|
||||
if( data->m_contactBuffer == 0 )
|
||||
{
|
||||
data->m_contactBuffer = new adl::Buffer<Contact4>( data->m_device, nContacts );
|
||||
}
|
||||
|
||||
adl::Buffer<Contact4>* contactNative = contactsIn;
|
||||
|
||||
ParallelSolveData* nativeSolveData = (ParallelSolveData*)data->m_parallelSolveData;
|
||||
|
||||
{
|
||||
|
||||
ADLASSERT( data->m_device->m_type == adl::TYPE_CL );
|
||||
adl::Buffer<RigidBodyBase::Body>* bodyNative = adl::BufferUtils::map<adl::TYPE_CL, true>( data->m_device, bodyBuf );
|
||||
adl::Buffer<Contact4>* contactNative = adl::BufferUtils::map<adl::TYPE_CL, true>( data->m_device, contactsIn );
|
||||
|
||||
const int sortAlignment = 512; // todo. get this out of sort
|
||||
if( csCfg.m_enableParallelSolve )
|
||||
{
|
||||
ParallelSolveData* nativeSolveData = (ParallelSolveData*)data->m_parallelSolveData;
|
||||
|
||||
int sortSize = NEXTMULTIPLEOF( nContacts, sortAlignment );
|
||||
|
||||
adl::Buffer<u32>* countsNative = nativeSolveData->m_numConstraints;//BufferUtils::map<TYPE_CL, false>( data->m_device, &countsHost );
|
||||
adl::Buffer<u32>* offsetsNative = nativeSolveData->m_offsets;//BufferUtils::map<TYPE_CL, false>( data->m_device, &offsetsHost );
|
||||
|
||||
{ // 2. set cell idx
|
||||
BT_PROFILE("GPU set cell idx");
|
||||
struct CB
|
||||
{
|
||||
int m_nContacts;
|
||||
int m_staticIdx;
|
||||
float m_scale;
|
||||
int m_nSplit;
|
||||
};
|
||||
|
||||
ADLASSERT( sortSize%64 == 0 );
|
||||
CB cdata;
|
||||
cdata.m_nContacts = nContacts;
|
||||
cdata.m_staticIdx = csCfg.m_staticIdx;
|
||||
cdata.m_scale = 1.f/(adl::SolverBase::N_OBJ_PER_SPLIT*csCfg.m_averageExtent);
|
||||
cdata.m_nSplit = adl::SolverBase::N_SPLIT;
|
||||
|
||||
adl::Buffer<CB> constBuffer( data->m_device, 1, adl::BufferBase::BUFFER_CONST );
|
||||
adl::Launcher::BufferInfo bInfo[] = { adl::Launcher::BufferInfo( contactNative ), adl::Launcher::BufferInfo( bodyNative ), adl::Launcher::BufferInfo( data->m_sortDataBuffer ) };
|
||||
adl::Launcher launcher( data->m_device, data->m_setSortDataKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(adl::Launcher::BufferInfo) );
|
||||
launcher.setConst( constBuffer, cdata );
|
||||
launcher.launch1D( sortSize, 64 );
|
||||
}
|
||||
bool gpuRadixSort=true;
|
||||
if (gpuRadixSort)
|
||||
{ // 3. sort by cell idx
|
||||
BT_PROFILE("gpuRadixSort");
|
||||
int n = adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT;
|
||||
int sortBit = 32;
|
||||
//if( n <= 0xffff ) sortBit = 16;
|
||||
//if( n <= 0xff ) sortBit = 8;
|
||||
//adl::RadixSort<adl::TYPE_CL>::execute( data->m_sort, *data->m_sortDataBuffer, sortSize );
|
||||
adl::RadixSort32<adl::TYPE_CL>::execute( data->m_sort32, *data->m_sortDataBuffer, sortSize );
|
||||
|
||||
} else
|
||||
{
|
||||
BT_PROFILE("cpu RadixSort");
|
||||
adl::HostBuffer<adl::SortData> sortData(m_internalData->m_deviceHost,nContacts);
|
||||
data->m_sortDataBuffer->read(sortData.m_ptr,nContacts);
|
||||
adl::DeviceUtils::waitForCompletion( m_internalData->m_deviceCL);
|
||||
|
||||
adl::RadixSort<adl::TYPE_HOST>::Data* sData = adl::RadixSort<adl::TYPE_HOST>::allocate( m_internalData->m_deviceHost, nContacts );
|
||||
adl::RadixSort<adl::TYPE_HOST>::execute( sData, sortData, nContacts );
|
||||
adl::RadixSort<adl::TYPE_HOST>::deallocate( sData );
|
||||
|
||||
data->m_sortDataBuffer->write(sortData.m_ptr,nContacts);
|
||||
adl::DeviceUtils::waitForCompletion( m_internalData->m_deviceCL);
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool gpuBoundSearch=true;
|
||||
if (gpuBoundSearch)
|
||||
{ // 4. find entries
|
||||
BT_PROFILE("gpuBoundSearch");
|
||||
adl::BoundSearch<adl::TYPE_CL>::execute( data->m_search, *data->m_sortDataBuffer, nContacts, *countsNative,
|
||||
adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT, adl::BoundSearchBase::COUNT );
|
||||
|
||||
adl::PrefixScan<adl::TYPE_CL>::execute( data->m_scan, *countsNative, *offsetsNative,
|
||||
adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT );
|
||||
} else
|
||||
{
|
||||
BT_PROFILE("cpuBoundSearch");
|
||||
adl::HostBuffer<adl::SortData> sortData(m_internalData->m_deviceHost,nContacts);
|
||||
data->m_sortDataBuffer->read(sortData.m_ptr,nContacts);
|
||||
adl::DeviceUtils::waitForCompletion( m_internalData->m_deviceCL);
|
||||
|
||||
adl::HostBuffer<u32> n0( m_internalData->m_deviceHost, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT );
|
||||
adl::HostBuffer<u32> offset0( m_internalData->m_deviceHost, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT );
|
||||
for(int i=0; i<adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT; i++)
|
||||
{
|
||||
n0[i] = 0;
|
||||
offset0[i] = 0;
|
||||
}
|
||||
|
||||
for(int i=0; i<nContacts; i++)
|
||||
{
|
||||
int idx = sortData[i].m_key;
|
||||
assert(idx>=0);
|
||||
assert(idx<adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
|
||||
n0[idx]++;
|
||||
}
|
||||
|
||||
// scan
|
||||
int sum = 0;
|
||||
for(int i=0; i<adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT; i++)
|
||||
{
|
||||
offset0[i] = sum;
|
||||
sum += n0[i];
|
||||
}
|
||||
|
||||
countsNative->write(n0.m_ptr,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
|
||||
offsetsNative->write(offset0.m_ptr,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
|
||||
adl::DeviceUtils::waitForCompletion( data->m_device );
|
||||
|
||||
}
|
||||
{ // 5. sort constraints by cellIdx
|
||||
{
|
||||
BT_PROFILE("gpu m_reorderContactKernel");
|
||||
adl::Buffer<int4> constBuffer( data->m_device, 1, adl::BufferBase::BUFFER_CONST );
|
||||
|
||||
int4 cdata; cdata.x = nContacts;
|
||||
adl::Launcher::BufferInfo bInfo[] = { adl::Launcher::BufferInfo( contactNative ), adl::Launcher::BufferInfo( data->m_contactBuffer ), adl::Launcher::BufferInfo( data->m_sortDataBuffer ) };
|
||||
adl::Launcher launcher( data->m_device, data->m_reorderContactKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(adl::Launcher::BufferInfo) );
|
||||
launcher.setConst( constBuffer, cdata );
|
||||
launcher.launch1D( nContacts, 64 );
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
adl::BufferUtils::unmap<false>( bodyNative, bodyBuf );
|
||||
adl::BufferUtils::unmap<false>( contactNative, contactsIn );
|
||||
|
||||
}
|
||||
|
||||
adl::DeviceUtils::waitForCompletion( m_internalData->m_deviceCL);
|
||||
|
||||
{
|
||||
BT_PROFILE("gpu m_copyConstraintKernel");
|
||||
adl::Buffer<int4> constBuffer( data->m_device, 1, adl::BufferBase::BUFFER_CONST );
|
||||
int4 cdata; cdata.x = nContacts;
|
||||
adl::Launcher::BufferInfo bInfo[] = { adl::Launcher::BufferInfo( data->m_contactBuffer ), adl::Launcher::BufferInfo( contactNative ) };
|
||||
adl::Launcher launcher( data->m_device, data->m_copyConstraintKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(adl::Launcher::BufferInfo) );
|
||||
launcher.setConst( constBuffer, cdata );
|
||||
launcher.launch1D( nContacts, 64 );
|
||||
adl::DeviceUtils::waitForCompletion( data->m_device );
|
||||
}
|
||||
|
||||
bool compareGPU = false;
|
||||
if (gpuBatchContacts)
|
||||
{
|
||||
BT_PROFILE("gpu batchContacts");
|
||||
adl::Solver<adl::TYPE_CL>::batchContacts( data, contactNative, nContacts, nativeSolveData->m_numConstraints, nativeSolveData->m_offsets, csCfg.m_staticIdx );
|
||||
}
|
||||
else
|
||||
{
|
||||
BT_PROFILE("cpu batchContacts2");
|
||||
cpuSolverData->m_parallelSolveData = 0;//
|
||||
ParallelSolveData* dataGPU = (ParallelSolveData*)m_internalData->m_solverDataGPU->m_parallelSolveData;
|
||||
adl::Buffer<u32> numConstraints(cpuSolverData->m_device,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
|
||||
adl::Buffer<u32> offsets(cpuSolverData->m_device,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
|
||||
|
||||
{
|
||||
BT_PROFILE("gpu->cpu read m_numConstraints");
|
||||
dataGPU->m_numConstraints->read(numConstraints.m_ptr,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
|
||||
dataGPU->m_offsets->read(offsets.m_ptr,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
|
||||
adl::DeviceUtils::waitForCompletion( data->m_device );
|
||||
}
|
||||
|
||||
adl::Buffer<u32> gpunumConstraints(cpuSolverData->m_device,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
|
||||
adl::Buffer<u32> gpuoffsets(cpuSolverData->m_device,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
|
||||
|
||||
if (compareGPU)
|
||||
{
|
||||
adl::Buffer<Contact4> contactNativeCopy (data->m_device,contactNative->getSize());
|
||||
contactNativeCopy.write(*contactNative,contactNative->getSize());
|
||||
adl::DeviceUtils::waitForCompletion( data->m_device );
|
||||
|
||||
adl::Buffer<u32> tmpNumGPU(data->m_device,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
|
||||
adl::Buffer<u32> tmpOffsetGPU(data->m_device,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
|
||||
tmpNumGPU.write(numConstraints.m_ptr,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
|
||||
tmpOffsetGPU.write(offsets.m_ptr,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
|
||||
adl::DeviceUtils::waitForCompletion( data->m_device );
|
||||
|
||||
BT_PROFILE("gpu batchContacts");
|
||||
//adl::Solver<adl::TYPE_CL>::batchContacts( data, contactNative, nContacts, nativeSolveData->m_numConstraints, nativeSolveData->m_offsets, csCfg.m_staticIdx );
|
||||
adl::Solver<adl::TYPE_CL>::batchContacts( data, &contactNativeCopy, nContacts, &tmpNumGPU, &tmpOffsetGPU, csCfg.m_staticIdx );
|
||||
|
||||
|
||||
adl::DeviceUtils::waitForCompletion( data->m_device );
|
||||
|
||||
//compare now
|
||||
tmpNumGPU.read(gpunumConstraints,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
|
||||
tmpOffsetGPU.read(gpuoffsets,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
|
||||
adl::DeviceUtils::waitForCompletion( data->m_device );
|
||||
|
||||
}
|
||||
|
||||
CPUSolveData* dataCPU = (CPUSolveData*)cpuSolverData->m_parallelSolveData;
|
||||
|
||||
{
|
||||
BT_PROFILE("cpu batchContacts2");
|
||||
batchContacts2( cpuSolverData, contactNative, nContacts, &numConstraints, &offsets, csCfg.m_staticIdx );
|
||||
}
|
||||
|
||||
|
||||
if (compareGPU)
|
||||
{
|
||||
adl::DeviceUtils::waitForCompletion( data->m_device );
|
||||
dataGPU->m_numConstraints->write(numConstraints.m_ptr,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
|
||||
dataGPU->m_offsets->write(offsets.m_ptr,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
|
||||
adl::DeviceUtils::waitForCompletion( data->m_device );
|
||||
|
||||
|
||||
for (int i=0;i<adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT;i++)
|
||||
{
|
||||
if (gpunumConstraints.m_ptr[i] != numConstraints.m_ptr[i])
|
||||
{
|
||||
printf("numConstraints error at %d, expected %d got %d\n",i,numConstraints.m_ptr[i],gpunumConstraints.m_ptr[i]);
|
||||
}
|
||||
|
||||
if (gpuoffsets.m_ptr[i] != offsets.m_ptr[i])
|
||||
{
|
||||
printf("numConstraints error at %d, expected %d got %d\n",i,offsets.m_ptr[i],gpuoffsets.m_ptr[i]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
if (1)
|
||||
{
|
||||
BT_PROFILE("gpu convertToConstraints");
|
||||
adl::Solver<adl::TYPE_CL>::convertToConstraints( data, bodyBuf, shapeBuf, contactNative, contactCOut, additionalData, nContacts, csCfg );
|
||||
adl::DeviceUtils::waitForCompletion( data->m_device );
|
||||
}
|
||||
if (compareGPU)
|
||||
{
|
||||
adl::Buffer<Contact4> contactNativeCPU(cpuSolverData->m_device,contactNative->getSize());
|
||||
contactNative->read(contactNativeCPU,nContacts);
|
||||
adl::DeviceUtils::waitForCompletion( data->m_device );
|
||||
for (int i=0;i<nContacts;i++)
|
||||
{
|
||||
//if (contactNativeCopyCPU.m_ptr[i].m_frictionCoeffCmp !=45874)// contactNativeCPU.m_ptr[i].m_batchIdx != contactNativeCopyCPU.m_ptr[i].m_batchIdx)
|
||||
{
|
||||
//if (.m_friction!=45874
|
||||
//printf("not matching at %d, expected %d, got %d\n",i,contactNativeCPU.m_ptr[i].m_batchIdx,contactNativeCopyCPU.m_ptr[i].m_batchIdx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
} else
|
||||
{
|
||||
BT_PROFILE("GPU reorderConvertToConstraints");
|
||||
adl::Solver<adl::TYPE_CL>::reorderConvertToConstraints(
|
||||
m_internalData->m_solverDataGPU,
|
||||
m_internalData->m_bodyBufferGPU,
|
||||
m_internalData->m_inertiaBufferGPU,
|
||||
m_internalData->m_pBufContactOutGPU,
|
||||
m_internalData->m_contactCGPU,
|
||||
m_internalData->m_frictionCGPU,
|
||||
nContactOut,
|
||||
csCfg );
|
||||
adl::DeviceUtils::waitForCompletion( m_internalData->m_deviceCL );
|
||||
}
|
||||
|
||||
|
||||
if (1)
|
||||
{
|
||||
BT_PROFILE("GPU solveContactConstraint");
|
||||
m_internalData->m_solverDataGPU->m_nIterations = 5;
|
||||
|
||||
adl::Solver<adl::TYPE_CL>::solveContactConstraint( m_internalData->m_solverDataGPU,
|
||||
m_internalData->m_bodyBufferGPU,
|
||||
m_internalData->m_inertiaBufferGPU,
|
||||
m_internalData->m_contactCGPU,
|
||||
0,
|
||||
nContactOut );
|
||||
|
||||
adl::DeviceUtils::waitForCompletion( m_internalData->m_deviceCL );
|
||||
}
|
||||
|
||||
if (cpuSolverData)
|
||||
adl::Solver<adl::TYPE_HOST>::deallocate( cpuSolverData );
|
||||
|
||||
if (0)
|
||||
{
|
||||
BT_PROFILE("read body velocities back to CPU");
|
||||
//read body updated linear/angular velocities back to CPU
|
||||
m_internalData->m_bodyBufferGPU->read(
|
||||
m_internalData->m_bodyBufferCPU->m_ptr,numOfConvexRBodies);
|
||||
adl::DeviceUtils::waitForCompletion( m_internalData->m_deviceCL );
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,72 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Erwin Coumans
|
||||
|
||||
#ifndef GPU_NARROWPHASE_SOLVER_H
|
||||
#define GPU_NARROWPHASE_SOLVER_H
|
||||
|
||||
|
||||
|
||||
//#define MAX_CONVEX_BODIES_CL 8*1024
|
||||
#define MAX_CONVEX_BODIES_CL 128*1024
|
||||
#define MAX_PAIRS_PER_BODY_CL 16
|
||||
#define MAX_CONVEX_SHAPES_CL 8192
|
||||
#define MAX_BROADPHASE_COLLISION_CL (MAX_CONVEX_BODIES_CL*MAX_PAIRS_PER_BODY_CL)
|
||||
|
||||
/*
|
||||
#define MAX_CONVEX_BODIES_CL 1024
|
||||
#define MAX_PAIRS_PER_BODY_CL 32
|
||||
#define MAX_CONVEX_SHAPES_CL 8192
|
||||
#define MAX_BROADPHASE_COLLISION_CL (MAX_CONVEX_BODIES_CL*MAX_PAIRS_PER_BODY_CL)
|
||||
*/
|
||||
|
||||
namespace adl
|
||||
{
|
||||
struct DeviceCL;
|
||||
};
|
||||
|
||||
|
||||
struct CustomDispatchData;
|
||||
|
||||
#include "../basic_initialize/btOpenCLInclude.h"
|
||||
|
||||
|
||||
class btGpuNarrowphaseAndSolver
|
||||
{
|
||||
protected:
|
||||
|
||||
CustomDispatchData* m_internalData;
|
||||
int m_acceleratedCompanionShapeIndex;
|
||||
int m_planeBodyIndex;
|
||||
|
||||
public:
|
||||
btGpuNarrowphaseAndSolver(adl::DeviceCL* deviceCL);
|
||||
|
||||
virtual ~btGpuNarrowphaseAndSolver(void);
|
||||
|
||||
int registerShape(class ConvexHeightField* convexShape);
|
||||
int registerRigidBody(int shapeIndex, float mass, const float* position, const float* orientation, bool writeToGpu = true);
|
||||
void writeAllBodiesToGpu();
|
||||
|
||||
//btBroadphasePair* GetPair(btBroadphasePairArray& pairArray, int idxBodyA, int idxBodyB);
|
||||
|
||||
virtual void computeContactsAndSolver(cl_mem broadphasePairs, int numBroadphasePairs);
|
||||
|
||||
cl_mem getBodiesGpu();
|
||||
|
||||
cl_mem getBodyInertiasGpu();
|
||||
|
||||
};
|
||||
|
||||
#endif //GPU_NARROWPHASE_SOLVER_H
|
||||
1775
Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline/main.cpp
Normal file
1775
Extras/RigidBodyGpuPipeline/opencl/gpu_rigidbody_pipeline/main.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,5 @@
|
||||
|
||||
include "AMD"
|
||||
-- include "Intel"
|
||||
include "NVIDIA"
|
||||
|
||||
@@ -0,0 +1,64 @@
|
||||
|
||||
hasCL = findOpenCL_AMD()
|
||||
|
||||
if (hasCL) then
|
||||
|
||||
project "OpenCL_gpu_rigidbody_pipeline2_AMD"
|
||||
|
||||
initOpenCL_AMD()
|
||||
|
||||
language "C++"
|
||||
|
||||
kind "ConsoleApp"
|
||||
targetdir "../../../bin"
|
||||
|
||||
|
||||
initOpenGL()
|
||||
initGlew()
|
||||
|
||||
includedirs {
|
||||
"../../primitives",
|
||||
"../../../../../src"
|
||||
}
|
||||
|
||||
files {
|
||||
"../main.cpp",
|
||||
"../CLPhysicsDemo.cpp",
|
||||
"../CLPhysicsDemo.h",
|
||||
"../GLInstancingRenderer.cpp",
|
||||
"../GLInstancingRenderer.h",
|
||||
"../GlutRenderer.cpp",
|
||||
"../GlutRenderer.h",
|
||||
"../Win32OpenGLRenderManager.cpp",
|
||||
"../Win32OpenGLRenderManager.h",
|
||||
"../../gpu_rigidbody_pipeline/btConvexUtility.cpp",
|
||||
"../../gpu_rigidbody_pipeline/btConvexUtility.h",
|
||||
"../../gpu_rigidbody_pipeline/btGpuNarrowPhaseAndSolver.cpp",
|
||||
"../../gpu_rigidbody_pipeline/btGpuNarrowPhaseAndSolver.h",
|
||||
"../../../dynamics/basic_demo/ConvexHeightFieldShape.cpp",
|
||||
"../../../dynamics/basic_demo/ConvexHeightFieldShape.h",
|
||||
"../../../../../src/LinearMath/btConvexHullComputer.cpp",
|
||||
"../../../../../src/LinearMath/btConvexHullComputer.h",
|
||||
"../../broadphase_benchmark/findPairsOpenCL.cpp",
|
||||
"../../broadphase_benchmark/findPairsOpenCL.h",
|
||||
"../../broadphase_benchmark/btGridBroadphaseCL.cpp",
|
||||
"../../broadphase_benchmark/btGridBroadphaseCL.h",
|
||||
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.cpp",
|
||||
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.h",
|
||||
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.cpp",
|
||||
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.h",
|
||||
"../../../../../src/LinearMath/btAlignedAllocator.cpp",
|
||||
"../../../../../src/LinearMath/btQuickprof.cpp",
|
||||
"../../../../../src/LinearMath/btQuickprof.h",
|
||||
"../../../../../src/BulletCollision/BroadphaseCollision/btBroadphaseProxy.cpp",
|
||||
"../../../../../src/BulletCollision/BroadphaseCollision/btOverlappingPairCache.cpp",
|
||||
"../../../../../src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.h",
|
||||
"../../opengl_interop/btOpenCLGLInteropBuffer.cpp",
|
||||
"../../opengl_interop/btOpenCLGLInteropBuffer.h",
|
||||
"../../opengl_interop/btStopwatch.cpp",
|
||||
"../../opengl_interop/btStopwatch.h"
|
||||
}
|
||||
|
||||
end
|
||||
@@ -0,0 +1,529 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Erwin Coumans
|
||||
|
||||
#include "OpenGLInclude.h"
|
||||
|
||||
#include "CLPhysicsDemo.h"
|
||||
#include "LinearMath/btAlignedObjectArray.h"
|
||||
#include "DemoSettings.h"
|
||||
#include "../basic_initialize/btOpenCLUtils.h"
|
||||
#include "../opengl_interop/btOpenCLGLInteropBuffer.h"
|
||||
#include "../broadphase_benchmark/findPairsOpenCL.h"
|
||||
#include "LinearMath/btVector3.h"
|
||||
#include "LinearMath/btQuaternion.h"
|
||||
#include "LinearMath/btMatrix3x3.h"
|
||||
#include "../../opencl/gpu_rigidbody_pipeline/btGpuNarrowPhaseAndSolver.h"
|
||||
#include "../../opencl/gpu_rigidbody_pipeline/btConvexUtility.h"
|
||||
#include "../../dynamics/basic_demo/ConvexHeightFieldShape.h"
|
||||
#include "../broadphase_benchmark/btGridBroadphaseCl.h"
|
||||
#include "LinearMath/btQuickprof.h"
|
||||
|
||||
|
||||
#define MSTRINGIFY(A) #A
|
||||
static char* interopKernelString =
|
||||
#include "../broadphase_benchmark/integrateKernel.cl"
|
||||
|
||||
#define INTEROPKERNEL_SRC_PATH "../../opencl/broadphase_benchmark/integrateKernel.cl"
|
||||
|
||||
cl_kernel g_integrateTransformsKernel;
|
||||
|
||||
|
||||
|
||||
bool runOpenCLKernels = true;
|
||||
|
||||
|
||||
btGpuNarrowphaseAndSolver* narrowphaseAndSolver = 0;
|
||||
ConvexHeightField* s_convexHeightField = 0 ;
|
||||
btOpenCLGLInteropBuffer* g_interopBuffer = 0;
|
||||
|
||||
extern GLuint cube_vbo;
|
||||
extern int VBOsize;
|
||||
|
||||
cl_mem clBuffer=0;
|
||||
char* hostPtr=0;
|
||||
cl_bool blocking= CL_TRUE;
|
||||
|
||||
|
||||
|
||||
btFindPairsIO gFpIO;
|
||||
|
||||
cl_context g_cxMainContext;
|
||||
cl_command_queue g_cqCommandQue;
|
||||
cl_device_id g_device;
|
||||
|
||||
cl_mem gLinVelMem=0;
|
||||
cl_mem gAngVelMem=0;
|
||||
cl_mem gBodyTimes=0;
|
||||
|
||||
#include <Adl/Adl.h>
|
||||
|
||||
adl::DeviceCL* g_deviceCL=0;
|
||||
|
||||
struct btAABBHost //keep this in sync with btAABBCL!
|
||||
{
|
||||
float fx;
|
||||
float fy;
|
||||
float fz;
|
||||
unsigned int uw;
|
||||
};
|
||||
|
||||
struct InternalData
|
||||
{
|
||||
adl::Buffer<btVector3>* m_linVelBuf;
|
||||
adl::Buffer<btVector3>* m_angVelBuf;
|
||||
adl::Buffer<float>* m_bodyTimes;
|
||||
bool m_useInterop;
|
||||
btGridBroadphaseCl* m_Broadphase;
|
||||
|
||||
adl::Buffer<btAABBHost>* m_localShapeAABB;
|
||||
|
||||
btVector3* m_linVelHost;
|
||||
btVector3* m_angVelHost;
|
||||
float* m_bodyTimesHost;
|
||||
|
||||
InternalData():m_linVelBuf(0),m_angVelBuf(0),m_bodyTimes(0),m_useInterop(0),m_Broadphase(0)
|
||||
{
|
||||
m_linVelHost= new btVector3[MAX_CONVEX_BODIES_CL];
|
||||
m_angVelHost = new btVector3[MAX_CONVEX_BODIES_CL];
|
||||
m_bodyTimesHost = new float[MAX_CONVEX_BODIES_CL];
|
||||
}
|
||||
~InternalData()
|
||||
{
|
||||
delete[] m_linVelHost;
|
||||
delete[] m_angVelHost;
|
||||
delete[] m_bodyTimesHost;
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
void InitCL(int preferredDeviceIndex, int preferredPlatformIndex, bool useInterop)
|
||||
{
|
||||
void* glCtx=0;
|
||||
void* glDC = 0;
|
||||
|
||||
#ifdef _WIN32
|
||||
glCtx = wglGetCurrentContext();
|
||||
#else //!_WIN32
|
||||
GLXContext glCtx = glXGetCurrentContext();
|
||||
#endif //!_WIN32
|
||||
glDC = wglGetCurrentDC();
|
||||
|
||||
int ciErrNum = 0;
|
||||
#ifdef CL_PLATFORM_INTEL
|
||||
cl_device_type deviceType = CL_DEVICE_TYPE_ALL;
|
||||
#else
|
||||
cl_device_type deviceType = CL_DEVICE_TYPE_GPU;
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
if (useInterop)
|
||||
{
|
||||
g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC);
|
||||
} else
|
||||
{
|
||||
g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex);
|
||||
}
|
||||
|
||||
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
int numDev = btOpenCLUtils::getNumDevices(g_cxMainContext);
|
||||
|
||||
if (numDev>0)
|
||||
{
|
||||
g_device= btOpenCLUtils::getDevice(g_cxMainContext,0);
|
||||
btOpenCLDeviceInfo clInfo;
|
||||
btOpenCLUtils::getDeviceInfo(g_device,clInfo);
|
||||
btOpenCLUtils::printDeviceInfo(g_device);
|
||||
g_cqCommandQue = clCreateCommandQueue(g_cxMainContext, g_device, 0, &ciErrNum);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
CLPhysicsDemo::CLPhysicsDemo(Win32OpenGLWindow* renderer)
|
||||
{
|
||||
m_numCollisionShapes=0;
|
||||
m_numPhysicsInstances=0;
|
||||
|
||||
m_data = new InternalData;
|
||||
}
|
||||
|
||||
CLPhysicsDemo::~CLPhysicsDemo()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
|
||||
void CLPhysicsDemo::writeBodiesToGpu()
|
||||
{
|
||||
if (narrowphaseAndSolver)
|
||||
narrowphaseAndSolver->writeAllBodiesToGpu();
|
||||
}
|
||||
|
||||
int CLPhysicsDemo::registerCollisionShape(const float* vertices, int strideInBytes, int numVertices, const float* scaling)
|
||||
{
|
||||
btAlignedObjectArray<btVector3> verts;
|
||||
|
||||
unsigned char* vts = (unsigned char*) vertices;
|
||||
for (int i=0;i<numVertices;i++)
|
||||
{
|
||||
float* vertex = (float*) &vts[i*strideInBytes];
|
||||
verts.push_back(btVector3(vertex[0]*scaling[0],vertex[1]*scaling[1],vertex[2]*scaling[2]));
|
||||
}
|
||||
|
||||
btConvexUtility util;
|
||||
bool merge = true;
|
||||
util.initializePolyhedralFeatures(verts,merge);
|
||||
|
||||
int numFaces= util.m_faces.size();
|
||||
float4* eqn = new float4[numFaces];
|
||||
for (int i=0;i<numFaces;i++)
|
||||
{
|
||||
eqn[i].x = util.m_faces[i].m_plane[0];
|
||||
eqn[i].y = util.m_faces[i].m_plane[1];
|
||||
eqn[i].z = util.m_faces[i].m_plane[2];
|
||||
eqn[i].w = util.m_faces[i].m_plane[3];
|
||||
}
|
||||
printf("numFaces = %d\n", numFaces);
|
||||
|
||||
|
||||
s_convexHeightField = new ConvexHeightField(eqn,numFaces);
|
||||
|
||||
int shapeIndex=-1;
|
||||
|
||||
if (narrowphaseAndSolver)
|
||||
shapeIndex = narrowphaseAndSolver->registerShape(s_convexHeightField);
|
||||
|
||||
if (shapeIndex>=0)
|
||||
{
|
||||
btAABBHost aabbMin, aabbMax;
|
||||
aabbMin.fx = s_convexHeightField->m_aabb.m_min.x;
|
||||
aabbMin.fy = s_convexHeightField->m_aabb.m_min.y;
|
||||
aabbMin.fz= s_convexHeightField->m_aabb.m_min.z;
|
||||
aabbMin.uw = shapeIndex;
|
||||
|
||||
aabbMax.fx = s_convexHeightField->m_aabb.m_max.x;
|
||||
aabbMax.fy = s_convexHeightField->m_aabb.m_max.y;
|
||||
aabbMax.fz= s_convexHeightField->m_aabb.m_max.z;
|
||||
aabbMax.uw = shapeIndex;
|
||||
|
||||
m_data->m_localShapeAABB->write(&aabbMin,1,shapeIndex*2);
|
||||
m_data->m_localShapeAABB->write(&aabbMax,1,shapeIndex*2+1);
|
||||
adl::DeviceUtils::waitForCompletion( g_deviceCL );
|
||||
}
|
||||
|
||||
m_numCollisionShapes++;
|
||||
delete[] eqn;
|
||||
return shapeIndex;
|
||||
}
|
||||
|
||||
int CLPhysicsDemo::registerPhysicsInstance(float mass, const float* position, const float* orientation, int collisionShapeIndex, void* userPointer)
|
||||
{
|
||||
btVector3 aabbMin(position[0],position[0],position[0]);
|
||||
btVector3 aabbMax = aabbMin;
|
||||
aabbMin -= btVector3(1.f,1.f,1.f);
|
||||
aabbMax += btVector3(1.f,1.f,1.f);
|
||||
|
||||
if (collisionShapeIndex>=0)
|
||||
{
|
||||
btBroadphaseProxy* proxy = m_data->m_Broadphase->createProxy(aabbMin,aabbMax,collisionShapeIndex,userPointer,1,1,0,0);//m_dispatcher);
|
||||
}
|
||||
|
||||
bool writeToGpu = false;
|
||||
int bodyIndex = -1;
|
||||
|
||||
if (narrowphaseAndSolver)
|
||||
bodyIndex = narrowphaseAndSolver->registerRigidBody(collisionShapeIndex,mass,position,orientation,writeToGpu);
|
||||
|
||||
m_numPhysicsInstances++;
|
||||
return bodyIndex;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void CLPhysicsDemo::init(int preferredDevice, int preferredPlatform, bool useInterop)
|
||||
{
|
||||
|
||||
InitCL(-1,-1,useInterop);
|
||||
|
||||
#define CUSTOM_CL_INITIALIZATION
|
||||
#ifdef CUSTOM_CL_INITIALIZATION
|
||||
g_deviceCL = new adl::DeviceCL();
|
||||
g_deviceCL->m_deviceIdx = g_device;
|
||||
g_deviceCL->m_context = g_cxMainContext;
|
||||
g_deviceCL->m_commandQueue = g_cqCommandQue;
|
||||
g_deviceCL->m_kernelManager = new adl::KernelManager;
|
||||
|
||||
#else
|
||||
DeviceUtils::Config cfg;
|
||||
cfg.m_type = DeviceUtils::Config::DEVICE_CPU;
|
||||
g_deviceCL = DeviceUtils::allocate( TYPE_CL, cfg );
|
||||
#endif
|
||||
|
||||
//adl::Solver<adl::TYPE_CL>::allocate(g_deviceCL->allocate(
|
||||
m_data->m_linVelBuf = new adl::Buffer<btVector3>(g_deviceCL,MAX_CONVEX_BODIES_CL);
|
||||
m_data->m_angVelBuf = new adl::Buffer<btVector3>(g_deviceCL,MAX_CONVEX_BODIES_CL);
|
||||
m_data->m_bodyTimes = new adl::Buffer<float>(g_deviceCL,MAX_CONVEX_BODIES_CL);
|
||||
|
||||
m_data->m_localShapeAABB = new adl::Buffer<btAABBHost>(g_deviceCL,MAX_CONVEX_SHAPES_CL);
|
||||
|
||||
gLinVelMem = (cl_mem)m_data->m_linVelBuf->m_ptr;
|
||||
gAngVelMem = (cl_mem)m_data->m_angVelBuf->m_ptr;
|
||||
gBodyTimes = (cl_mem)m_data->m_bodyTimes->m_ptr;
|
||||
|
||||
|
||||
|
||||
|
||||
narrowphaseAndSolver = new btGpuNarrowphaseAndSolver(g_deviceCL);
|
||||
|
||||
|
||||
|
||||
int maxObjects = btMax(256,MAX_CONVEX_BODIES_CL);
|
||||
int maxPairsSmallProxy = 32;
|
||||
btOverlappingPairCache* overlappingPairCache=0;
|
||||
|
||||
m_data->m_Broadphase = new btGridBroadphaseCl(overlappingPairCache,btVector3(4.f, 4.f, 4.f), 128, 128, 128,maxObjects, maxObjects, maxPairsSmallProxy, 100.f, 128,
|
||||
g_cxMainContext ,g_device,g_cqCommandQue, g_deviceCL);
|
||||
|
||||
|
||||
|
||||
cl_program prog = btOpenCLUtils::compileCLProgramFromString(g_cxMainContext,g_device,interopKernelString,0,"",INTEROPKERNEL_SRC_PATH);
|
||||
g_integrateTransformsKernel = btOpenCLUtils::compileCLKernelFromString(g_cxMainContext, g_device,interopKernelString, "integrateTransformsKernel" ,0,prog);
|
||||
|
||||
|
||||
initFindPairs(gFpIO, g_cxMainContext, g_device, g_cqCommandQue, MAX_CONVEX_BODIES_CL);
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
void CLPhysicsDemo::writeVelocitiesToGpu()
|
||||
{
|
||||
m_data->m_linVelBuf->write(m_data->m_linVelHost,MAX_CONVEX_BODIES_CL);
|
||||
m_data->m_angVelBuf->write(m_data->m_angVelHost,MAX_CONVEX_BODIES_CL);
|
||||
m_data->m_bodyTimes->write(m_data->m_bodyTimesHost,MAX_CONVEX_BODIES_CL);
|
||||
adl::DeviceUtils::waitForCompletion( g_deviceCL );
|
||||
}
|
||||
|
||||
|
||||
void CLPhysicsDemo::setupInterop()
|
||||
{
|
||||
m_data->m_useInterop = true;
|
||||
|
||||
g_interopBuffer = new btOpenCLGLInteropBuffer(g_cxMainContext,g_cqCommandQue,cube_vbo);
|
||||
clFinish(g_cqCommandQue);
|
||||
}
|
||||
|
||||
void CLPhysicsDemo::cleanup()
|
||||
{
|
||||
delete narrowphaseAndSolver;
|
||||
|
||||
delete m_data->m_linVelBuf;
|
||||
delete m_data->m_angVelBuf;
|
||||
delete m_data->m_bodyTimes;
|
||||
delete m_data->m_localShapeAABB;
|
||||
|
||||
delete m_data->m_Broadphase;
|
||||
delete m_data;
|
||||
|
||||
delete g_deviceCL->m_kernelManager;
|
||||
delete g_deviceCL;
|
||||
|
||||
m_data=0;
|
||||
g_deviceCL=0;
|
||||
delete g_interopBuffer;
|
||||
delete s_convexHeightField;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void CLPhysicsDemo::stepSimulation()
|
||||
{
|
||||
BT_PROFILE("simulationLoop");
|
||||
|
||||
{
|
||||
BT_PROFILE("glFinish");
|
||||
glFinish();
|
||||
}
|
||||
cl_int ciErrNum = CL_SUCCESS;
|
||||
|
||||
|
||||
if(m_data->m_useInterop)
|
||||
{
|
||||
clBuffer = g_interopBuffer->getCLBUffer();
|
||||
BT_PROFILE("clEnqueueAcquireGLObjects");
|
||||
ciErrNum = clEnqueueAcquireGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, NULL);
|
||||
adl::DeviceUtils::waitForCompletion( g_deviceCL );
|
||||
} else
|
||||
{
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, cube_vbo);
|
||||
glFlush();
|
||||
|
||||
BT_PROFILE("glMapBuffer and clEnqueueWriteBuffer");
|
||||
|
||||
blocking= CL_TRUE;
|
||||
hostPtr= (char*)glMapBuffer( GL_ARRAY_BUFFER,GL_READ_WRITE);//GL_WRITE_ONLY
|
||||
if (!clBuffer)
|
||||
{
|
||||
clBuffer = clCreateBuffer(g_cxMainContext, CL_MEM_READ_WRITE, VBOsize, 0, &ciErrNum);
|
||||
}
|
||||
adl::DeviceUtils::waitForCompletion( g_deviceCL );
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
ciErrNum = clEnqueueWriteBuffer ( g_cqCommandQue,
|
||||
clBuffer,
|
||||
blocking,
|
||||
0,
|
||||
VBOsize,
|
||||
hostPtr,0,0,0
|
||||
);
|
||||
adl::DeviceUtils::waitForCompletion( g_deviceCL );
|
||||
}
|
||||
|
||||
|
||||
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
if (runOpenCLKernels && m_numPhysicsInstances)
|
||||
{
|
||||
|
||||
gFpIO.m_numObjects = m_numPhysicsInstances;
|
||||
gFpIO.m_positionOffset = SHAPE_VERTEX_BUFFER_SIZE/4;
|
||||
gFpIO.m_clObjectsBuffer = clBuffer;
|
||||
gFpIO.m_dAABB = m_data->m_Broadphase->m_dAABB;
|
||||
gFpIO.m_dlocalShapeAABB = (cl_mem)m_data->m_localShapeAABB->m_ptr;
|
||||
gFpIO.m_numOverlap = 0;
|
||||
{
|
||||
BT_PROFILE("setupGpuAabbs");
|
||||
setupGpuAabbsFull(gFpIO,narrowphaseAndSolver->getBodiesGpu() );
|
||||
}
|
||||
if (1)
|
||||
{
|
||||
BT_PROFILE("calculateOverlappingPairs");
|
||||
m_data->m_Broadphase->calculateOverlappingPairs(0, m_numPhysicsInstances);
|
||||
gFpIO.m_dAllOverlappingPairs = m_data->m_Broadphase->m_dAllOverlappingPairs;
|
||||
gFpIO.m_numOverlap = m_data->m_Broadphase->m_numPrefixSum;
|
||||
}
|
||||
|
||||
//printf("gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap );
|
||||
if (gFpIO.m_numOverlap>=0 && gFpIO.m_numOverlap<MAX_BROADPHASE_COLLISION_CL)
|
||||
{
|
||||
colorPairsOpenCL(gFpIO);
|
||||
|
||||
if (1)
|
||||
{
|
||||
{
|
||||
//BT_PROFILE("setupBodies");
|
||||
if (narrowphaseAndSolver)
|
||||
setupBodies(gFpIO, gLinVelMem, gAngVelMem, narrowphaseAndSolver->getBodiesGpu(), narrowphaseAndSolver->getBodyInertiasGpu());
|
||||
}
|
||||
if (gFpIO.m_numOverlap)
|
||||
{
|
||||
BT_PROFILE("computeContactsAndSolver");
|
||||
if (narrowphaseAndSolver)
|
||||
narrowphaseAndSolver->computeContactsAndSolver(gFpIO.m_dAllOverlappingPairs,gFpIO.m_numOverlap);
|
||||
}
|
||||
|
||||
{
|
||||
BT_PROFILE("copyBodyVelocities");
|
||||
if (narrowphaseAndSolver)
|
||||
copyBodyVelocities(gFpIO, gLinVelMem, gAngVelMem, narrowphaseAndSolver->getBodiesGpu(), narrowphaseAndSolver->getBodyInertiasGpu());
|
||||
}
|
||||
}
|
||||
|
||||
} else
|
||||
{
|
||||
printf("error, gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap);
|
||||
btAssert(0);
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
BT_PROFILE("integrateTransforms");
|
||||
|
||||
if (runOpenCLKernels)
|
||||
{
|
||||
int numObjects = m_numPhysicsInstances;
|
||||
int offset = SHAPE_VERTEX_BUFFER_SIZE/4;
|
||||
|
||||
ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 0, sizeof(int), &offset);
|
||||
ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 1, sizeof(int), &numObjects);
|
||||
ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 2, sizeof(cl_mem), (void*)&clBuffer );
|
||||
|
||||
ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 3, sizeof(cl_mem), (void*)&gLinVelMem);
|
||||
ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 4, sizeof(cl_mem), (void*)&gAngVelMem);
|
||||
ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 5, sizeof(cl_mem), (void*)&gBodyTimes);
|
||||
|
||||
|
||||
|
||||
|
||||
size_t workGroupSize = 64;
|
||||
size_t numWorkItems = workGroupSize*((m_numPhysicsInstances + (workGroupSize)) / workGroupSize);
|
||||
|
||||
if (workGroupSize>numWorkItems)
|
||||
workGroupSize=numWorkItems;
|
||||
|
||||
ciErrNum = clEnqueueNDRangeKernel(g_cqCommandQue, g_integrateTransformsKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
if(m_data->m_useInterop)
|
||||
{
|
||||
BT_PROFILE("clEnqueueReleaseGLObjects");
|
||||
ciErrNum = clEnqueueReleaseGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, 0);
|
||||
adl::DeviceUtils::waitForCompletion( g_deviceCL );
|
||||
}
|
||||
else
|
||||
{
|
||||
BT_PROFILE("clEnqueueReadBuffer clReleaseMemObject and glUnmapBuffer");
|
||||
ciErrNum = clEnqueueReadBuffer ( g_cqCommandQue,
|
||||
clBuffer,
|
||||
blocking,
|
||||
0,
|
||||
VBOsize,
|
||||
hostPtr,0,0,0);
|
||||
|
||||
//clReleaseMemObject(clBuffer);
|
||||
adl::DeviceUtils::waitForCompletion( g_deviceCL );
|
||||
glUnmapBuffer( GL_ARRAY_BUFFER);
|
||||
glFlush();
|
||||
}
|
||||
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
|
||||
if (runOpenCLKernels)
|
||||
{
|
||||
BT_PROFILE("clFinish");
|
||||
clFinish(g_cqCommandQue);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Erwin Coumans
|
||||
|
||||
#ifndef CL_PHYSICS_DEMO_H
|
||||
#define CL_PHYSICS_DEMO_H
|
||||
|
||||
class Win32OpenGLWindow;
|
||||
|
||||
struct CLPhysicsDemo
|
||||
{
|
||||
Win32OpenGLWindow* m_renderer;
|
||||
|
||||
int m_numCollisionShapes;
|
||||
|
||||
int m_numPhysicsInstances;
|
||||
|
||||
struct InternalData* m_data;
|
||||
|
||||
CLPhysicsDemo(Win32OpenGLWindow* renderer);
|
||||
|
||||
virtual ~CLPhysicsDemo();
|
||||
|
||||
//btOpenCLGLInteropBuffer* m_interopBuffer;
|
||||
|
||||
void init(int preferredDevice, int preferredPlatform, bool useInterop);
|
||||
|
||||
void setupInterop();
|
||||
|
||||
int registerCollisionShape(const float* vertices, int strideInBytes, int numVertices, const float* scaling);
|
||||
|
||||
int registerPhysicsInstance(float mass, const float* position, const float* orientation, int collisionShapeIndex, void* userPointer);
|
||||
|
||||
void writeVelocitiesToGpu();
|
||||
void writeBodiesToGpu();
|
||||
|
||||
void cleanup();
|
||||
|
||||
void stepSimulation();
|
||||
};
|
||||
|
||||
#endif//CL_PHYSICS_DEMO_H
|
||||
@@ -0,0 +1,24 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Erwin Coumans
|
||||
|
||||
#ifndef DEMO_SETTINGS_H
|
||||
#define DEMO_SETTINGS_H
|
||||
|
||||
#define SHAPE_VERTEX_BUFFER_SIZE 1024*1024
|
||||
|
||||
#define SHAPE_BUFFER_SIZE (SHAPE_VERTEX_BUFFER_SIZE)
|
||||
|
||||
|
||||
#endif //DEMO_SETTINGS_H
|
||||
@@ -0,0 +1,861 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Erwin Coumans
|
||||
|
||||
#include "OpenGLInclude.h"
|
||||
#include "GLInstancingRenderer.h"
|
||||
|
||||
#include <string.h>
|
||||
#include "DemoSettings.h"
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include "LinearMath/btVector3.h"
|
||||
#include "LinearMath/btQuaternion.h"
|
||||
#include "LinearMath/btQuickprof.h"
|
||||
#include "LinearMath/btMatrix3x3.h"
|
||||
|
||||
#include "../../opencl/gpu_rigidbody_pipeline/btGpuNarrowphaseAndSolver.h"//for MAX_CONVEX_BODIES_CL
|
||||
|
||||
struct btGraphicsInstance
|
||||
{
|
||||
GLuint m_cube_vao;
|
||||
GLuint m_index_vbo;
|
||||
int m_numIndices;
|
||||
int m_numVertices;
|
||||
|
||||
int m_numGraphicsInstances;
|
||||
|
||||
int m_instanceOffset;
|
||||
int m_vertexArrayOffset;
|
||||
|
||||
btGraphicsInstance() :m_cube_vao(-1),m_index_vbo(-1),m_numIndices(-1),m_numVertices(-1),m_numGraphicsInstances(0),m_instanceOffset(0),m_vertexArrayOffset(0)
|
||||
{
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
bool m_ortho = false;
|
||||
int m_glutScreenWidth = 1024;
|
||||
int m_glutScreenHeight = 768;
|
||||
|
||||
|
||||
|
||||
extern int gShapeIndex;
|
||||
|
||||
|
||||
btVector3 m_cameraPosition(0,0,0);//will be overridden by a position computed from azi/ele
|
||||
btVector3 m_cameraTargetPosition(30,-5,-20);
|
||||
btScalar m_cameraDistance = 95;
|
||||
btVector3 m_cameraUp(0,1,0);
|
||||
float m_azi=95.f;
|
||||
float m_ele=15.f;
|
||||
|
||||
|
||||
|
||||
|
||||
int VBOsize =0;
|
||||
|
||||
|
||||
|
||||
struct InternalDataRenderer
|
||||
{
|
||||
GLfloat* m_instance_positions_ptr;
|
||||
GLfloat* m_instance_quaternion_ptr;
|
||||
GLfloat* m_instance_colors_ptr;
|
||||
GLfloat* m_instance_scale_ptr;
|
||||
|
||||
InternalDataRenderer() :m_instance_positions_ptr (0),m_instance_quaternion_ptr(0),m_instance_colors_ptr(0),m_instance_scale_ptr(0)
|
||||
{
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
static GLuint instancingShader; // The instancing renderer
|
||||
|
||||
GLuint cube_vbo;
|
||||
|
||||
static GLuint m_texturehandle;
|
||||
|
||||
static bool done = false;
|
||||
static GLint angle_loc = 0;
|
||||
static GLint ModelViewMatrix;
|
||||
static GLint ProjectionMatrix;
|
||||
|
||||
|
||||
|
||||
GLInstancingRenderer::GLInstancingRenderer()
|
||||
{
|
||||
|
||||
m_data = new InternalDataRenderer;
|
||||
|
||||
m_data->m_instance_positions_ptr = (GLfloat*)new float[MAX_CONVEX_BODIES_CL*4];
|
||||
m_data->m_instance_quaternion_ptr = (GLfloat*)new float[MAX_CONVEX_BODIES_CL*4];
|
||||
m_data->m_instance_colors_ptr = (GLfloat*)new float[MAX_CONVEX_BODIES_CL*4];
|
||||
m_data->m_instance_scale_ptr = (GLfloat*)new float[MAX_CONVEX_BODIES_CL*3];
|
||||
|
||||
}
|
||||
|
||||
GLInstancingRenderer::~GLInstancingRenderer()
|
||||
{
|
||||
delete m_data;
|
||||
}
|
||||
|
||||
|
||||
static GLint uniform_texture_diffuse = 0;
|
||||
|
||||
//used for dynamic loading from disk (default switched off)
|
||||
#define MAX_SHADER_LENGTH 8192
|
||||
static GLubyte shaderText[MAX_SHADER_LENGTH];
|
||||
|
||||
static const char* vertexShader= \
|
||||
"#version 330\n"
|
||||
"precision highp float;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"layout (location = 0) in vec4 position;\n"
|
||||
"layout (location = 1) in vec4 instance_position;\n"
|
||||
"layout (location = 2) in vec4 instance_quaternion;\n"
|
||||
"layout (location = 3) in vec2 uvcoords;\n"
|
||||
"layout (location = 4) in vec3 vertexnormal;\n"
|
||||
"layout (location = 5) in vec4 instance_color;\n"
|
||||
"layout (location = 6) in vec3 instance_scale;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"uniform float angle = 0.0;\n"
|
||||
"uniform mat4 ModelViewMatrix;\n"
|
||||
"uniform mat4 ProjectionMatrix;\n"
|
||||
"\n"
|
||||
"out Fragment\n"
|
||||
"{\n"
|
||||
" vec4 color;\n"
|
||||
"} fragment;\n"
|
||||
"\n"
|
||||
"out Vert\n"
|
||||
"{\n"
|
||||
" vec2 texcoord;\n"
|
||||
"} vert;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"vec4 quatMul ( in vec4 q1, in vec4 q2 )\n"
|
||||
"{\n"
|
||||
" vec3 im = q1.w * q2.xyz + q1.xyz * q2.w + cross ( q1.xyz, q2.xyz );\n"
|
||||
" vec4 dt = q1 * q2;\n"
|
||||
" float re = dot ( dt, vec4 ( -1.0, -1.0, -1.0, 1.0 ) );\n"
|
||||
" return vec4 ( im, re );\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"vec4 quatFromAxisAngle(vec4 axis, in float angle)\n"
|
||||
"{\n"
|
||||
" float cah = cos(angle*0.5);\n"
|
||||
" float sah = sin(angle*0.5);\n"
|
||||
" float d = inversesqrt(dot(axis,axis));\n"
|
||||
" vec4 q = vec4(axis.x*sah*d,axis.y*sah*d,axis.z*sah*d,cah);\n"
|
||||
" return q;\n"
|
||||
"}\n"
|
||||
"//\n"
|
||||
"// vector rotation via quaternion\n"
|
||||
"//\n"
|
||||
"vec4 quatRotate3 ( in vec3 p, in vec4 q )\n"
|
||||
"{\n"
|
||||
" vec4 temp = quatMul ( q, vec4 ( p, 0.0 ) );\n"
|
||||
" return quatMul ( temp, vec4 ( -q.x, -q.y, -q.z, q.w ) );\n"
|
||||
"}\n"
|
||||
"vec4 quatRotate ( in vec4 p, in vec4 q )\n"
|
||||
"{\n"
|
||||
" vec4 temp = quatMul ( q, p );\n"
|
||||
" return quatMul ( temp, vec4 ( -q.x, -q.y, -q.z, q.w ) );\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"out vec3 lightDir,normal,ambient;\n"
|
||||
"\n"
|
||||
"void main(void)\n"
|
||||
"{\n"
|
||||
" vec4 q = instance_quaternion;\n"
|
||||
" ambient = vec3(0.3,.3,0.3);\n"
|
||||
" \n"
|
||||
" \n"
|
||||
" vec4 local_normal = (quatRotate3( vertexnormal,q));\n"
|
||||
" vec3 light_pos = vec3(-0.8,1,-0.6);\n"
|
||||
" normal = local_normal.xyz;\n"//normalize(ModelViewMatrix * local_normal).xyz;\n"
|
||||
"\n"
|
||||
" lightDir = normalize(light_pos);//gl_LightSource[0].position.xyz));\n"
|
||||
"// lightDir = normalize(vec3(gl_LightSource[0].position));\n"
|
||||
" \n"
|
||||
" vec4 axis = vec4(1,1,1,0);\n"
|
||||
" vec4 localcoord = quatRotate3( position.xyz*instance_scale,q);\n"
|
||||
" vec4 vertexPos = ProjectionMatrix * ModelViewMatrix *(instance_position+localcoord);\n"
|
||||
"\n"
|
||||
" gl_Position = vertexPos;\n"
|
||||
" \n"
|
||||
" fragment.color = instance_color;\n"
|
||||
" vert.texcoord = uvcoords;\n"
|
||||
"}\n"
|
||||
;
|
||||
|
||||
|
||||
static const char* fragmentShader= \
|
||||
"#version 330\n"
|
||||
"precision highp float;\n"
|
||||
"\n"
|
||||
"in Fragment\n"
|
||||
"{\n"
|
||||
" vec4 color;\n"
|
||||
"} fragment;\n"
|
||||
"\n"
|
||||
"in Vert\n"
|
||||
"{\n"
|
||||
" vec2 texcoord;\n"
|
||||
"} vert;\n"
|
||||
"\n"
|
||||
"uniform sampler2D Diffuse;\n"
|
||||
"\n"
|
||||
"in vec3 lightDir,normal,ambient;\n"
|
||||
"\n"
|
||||
"out vec4 color;\n"
|
||||
"\n"
|
||||
"void main_textured(void)\n"
|
||||
"{\n"
|
||||
" color = texture2D(Diffuse,vert.texcoord);//fragment.color;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"void main(void)\n"
|
||||
"{\n"
|
||||
" vec4 texel = fragment.color*texture2D(Diffuse,vert.texcoord);//fragment.color;\n"
|
||||
" vec3 ct,cf;\n"
|
||||
" float intensity,at,af;\n"
|
||||
" intensity = max(dot(lightDir,normalize(normal)),.2);\n"
|
||||
" cf = intensity*vec3(1.0,1.0,1.0)+ambient;"
|
||||
" af = 1.0;\n"
|
||||
" \n"
|
||||
" ct = texel.rgb;\n"
|
||||
" at = texel.a;\n"
|
||||
" \n"
|
||||
" color = vec4(ct * cf, at * af); \n"
|
||||
"}\n"
|
||||
;
|
||||
|
||||
|
||||
// Load the shader from the source text
|
||||
void gltLoadShaderSrc(const char *szShaderSrc, GLuint shader)
|
||||
{
|
||||
GLchar *fsStringPtr[1];
|
||||
|
||||
fsStringPtr[0] = (GLchar *)szShaderSrc;
|
||||
glShaderSource(shader, 1, (const GLchar **)fsStringPtr, NULL);
|
||||
}
|
||||
|
||||
|
||||
GLuint gltLoadShaderPair(const char *szVertexProg, const char *szFragmentProg)
|
||||
{
|
||||
// Temporary Shader objects
|
||||
GLuint hVertexShader;
|
||||
GLuint hFragmentShader;
|
||||
GLuint hReturn = 0;
|
||||
GLint testVal;
|
||||
|
||||
// Create shader objects
|
||||
hVertexShader = glCreateShader(GL_VERTEX_SHADER);
|
||||
hFragmentShader = glCreateShader(GL_FRAGMENT_SHADER);
|
||||
|
||||
gltLoadShaderSrc(vertexShader, hVertexShader);
|
||||
gltLoadShaderSrc(fragmentShader, hFragmentShader);
|
||||
|
||||
// Compile them
|
||||
glCompileShader(hVertexShader);
|
||||
glCompileShader(hFragmentShader);
|
||||
|
||||
// Check for errors
|
||||
glGetShaderiv(hVertexShader, GL_COMPILE_STATUS, &testVal);
|
||||
if(testVal == GL_FALSE)
|
||||
{
|
||||
char temp[256] = "";
|
||||
glGetShaderInfoLog( hVertexShader, 256, NULL, temp);
|
||||
fprintf( stderr, "Compile failed:\n%s\n", temp);
|
||||
assert(0);
|
||||
exit(0);
|
||||
glDeleteShader(hVertexShader);
|
||||
glDeleteShader(hFragmentShader);
|
||||
return (GLuint)NULL;
|
||||
}
|
||||
|
||||
glGetShaderiv(hFragmentShader, GL_COMPILE_STATUS, &testVal);
|
||||
if(testVal == GL_FALSE)
|
||||
{
|
||||
char temp[256] = "";
|
||||
glGetShaderInfoLog( hFragmentShader, 256, NULL, temp);
|
||||
fprintf( stderr, "Compile failed:\n%s\n", temp);
|
||||
assert(0);
|
||||
exit(0);
|
||||
glDeleteShader(hVertexShader);
|
||||
glDeleteShader(hFragmentShader);
|
||||
return (GLuint)NULL;
|
||||
}
|
||||
|
||||
// Link them - assuming it works...
|
||||
hReturn = glCreateProgram();
|
||||
glAttachShader(hReturn, hVertexShader);
|
||||
glAttachShader(hReturn, hFragmentShader);
|
||||
|
||||
glLinkProgram(hReturn);
|
||||
|
||||
// These are no longer needed
|
||||
glDeleteShader(hVertexShader);
|
||||
glDeleteShader(hFragmentShader);
|
||||
|
||||
// Make sure link worked too
|
||||
glGetProgramiv(hReturn, GL_LINK_STATUS, &testVal);
|
||||
if(testVal == GL_FALSE)
|
||||
{
|
||||
glDeleteProgram(hReturn);
|
||||
return (GLuint)NULL;
|
||||
}
|
||||
|
||||
return hReturn;
|
||||
}
|
||||
|
||||
|
||||
void GLInstancingRenderer::writeTransforms()
|
||||
{
|
||||
glBindBuffer(GL_ARRAY_BUFFER, cube_vbo);
|
||||
glFlush();
|
||||
|
||||
char* orgBase = (char*)glMapBuffer( GL_ARRAY_BUFFER,GL_READ_WRITE);
|
||||
|
||||
int totalNumInstances= 0;
|
||||
|
||||
for (int k=0;k<m_graphicsInstances.size();k++)
|
||||
{
|
||||
btGraphicsInstance* gfxObj = m_graphicsInstances[k];
|
||||
totalNumInstances+=gfxObj->m_numGraphicsInstances;
|
||||
}
|
||||
|
||||
|
||||
|
||||
for (int k=0;k<m_graphicsInstances.size();k++)
|
||||
{
|
||||
//int k=0;
|
||||
btGraphicsInstance* gfxObj = m_graphicsInstances[k];
|
||||
|
||||
|
||||
|
||||
int POSITION_BUFFER_SIZE = (totalNumInstances*sizeof(float)*4);
|
||||
int ORIENTATION_BUFFER_SIZE = (totalNumInstances*sizeof(float)*4);
|
||||
int COLOR_BUFFER_SIZE = (totalNumInstances*sizeof(float)*4);
|
||||
int SCALE_BUFFER_SIZE = (totalNumInstances*sizeof(float)*3);
|
||||
|
||||
char* base = orgBase;
|
||||
|
||||
float* positions = (float*)(base+SHAPE_BUFFER_SIZE);
|
||||
float* orientations = (float*)(base+SHAPE_BUFFER_SIZE + POSITION_BUFFER_SIZE);
|
||||
float* colors= (float*)(base+SHAPE_BUFFER_SIZE + POSITION_BUFFER_SIZE+ORIENTATION_BUFFER_SIZE);
|
||||
float* scaling= (float*)(base+SHAPE_BUFFER_SIZE + POSITION_BUFFER_SIZE+ORIENTATION_BUFFER_SIZE+COLOR_BUFFER_SIZE);
|
||||
|
||||
static int offset=0;
|
||||
//offset++;
|
||||
|
||||
|
||||
for (int i=0;i<gfxObj->m_numGraphicsInstances;i++)
|
||||
{
|
||||
|
||||
int srcIndex=i+gfxObj->m_instanceOffset;
|
||||
|
||||
positions[srcIndex*4] = m_data->m_instance_positions_ptr[srcIndex*4];
|
||||
positions[srcIndex*4+1] = m_data->m_instance_positions_ptr[srcIndex*4+1];
|
||||
positions[srcIndex*4+2] = m_data->m_instance_positions_ptr[srcIndex*4+2];
|
||||
positions[srcIndex*4+3] = m_data->m_instance_positions_ptr[srcIndex*4+3];
|
||||
|
||||
orientations[srcIndex*4]=m_data->m_instance_quaternion_ptr[srcIndex*4];
|
||||
orientations[srcIndex*4+1]=m_data->m_instance_quaternion_ptr[srcIndex*4+1];
|
||||
orientations[srcIndex*4+2]=m_data->m_instance_quaternion_ptr[srcIndex*4+2];
|
||||
orientations[srcIndex*4+3]=m_data->m_instance_quaternion_ptr[srcIndex*4+3];
|
||||
|
||||
colors[srcIndex*4]=m_data->m_instance_colors_ptr[srcIndex*4];
|
||||
colors[srcIndex*4+1]=m_data->m_instance_colors_ptr[srcIndex*4+1];
|
||||
colors[srcIndex*4+2]=m_data->m_instance_colors_ptr[srcIndex*4+2];
|
||||
colors[srcIndex*4+3]=m_data->m_instance_colors_ptr[srcIndex*4+3];
|
||||
|
||||
scaling[srcIndex*3]=m_data->m_instance_scale_ptr[srcIndex*3];
|
||||
scaling[srcIndex*3+1]=m_data->m_instance_scale_ptr[srcIndex*3+1];
|
||||
scaling[srcIndex*3+2]=m_data->m_instance_scale_ptr[srcIndex*3+2];
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
glUnmapBuffer( GL_ARRAY_BUFFER);
|
||||
//if this glFinish is removed, the animation is not always working/blocks
|
||||
//@todo: figure out why
|
||||
glFlush();
|
||||
}
|
||||
|
||||
int GLInstancingRenderer::registerGraphicsInstance(int shapeIndex, const float* position, const float* quaternion, const float* color, const float* scaling)
|
||||
{
|
||||
btGraphicsInstance* gfxObj = m_graphicsInstances[shapeIndex];
|
||||
|
||||
int index = gfxObj->m_numGraphicsInstances + gfxObj->m_instanceOffset;
|
||||
|
||||
|
||||
|
||||
m_data->m_instance_positions_ptr[index*4]=position[0];
|
||||
m_data->m_instance_positions_ptr[index*4+1]=position[1];
|
||||
m_data->m_instance_positions_ptr[index*4+2]=position[2];
|
||||
m_data->m_instance_positions_ptr[index*4+3]=1;
|
||||
|
||||
m_data->m_instance_quaternion_ptr[index*4]=quaternion[0];
|
||||
m_data->m_instance_quaternion_ptr[index*4+1]=quaternion[1];
|
||||
m_data->m_instance_quaternion_ptr[index*4+2]=quaternion[2];
|
||||
m_data->m_instance_quaternion_ptr[index*4+3]=quaternion[3];
|
||||
|
||||
m_data->m_instance_colors_ptr[index*4]=color[0];
|
||||
m_data->m_instance_colors_ptr[index*4+1]=color[1];
|
||||
m_data->m_instance_colors_ptr[index*4+2]=color[2];
|
||||
m_data->m_instance_colors_ptr[index*4+3]=color[3];
|
||||
|
||||
m_data->m_instance_scale_ptr[index*3] = scaling[0];
|
||||
m_data->m_instance_scale_ptr[index*3+1] = scaling[1];
|
||||
m_data->m_instance_scale_ptr[index*3+2] = scaling[2];
|
||||
|
||||
gfxObj->m_numGraphicsInstances++;
|
||||
return gfxObj->m_numGraphicsInstances;
|
||||
}
|
||||
|
||||
|
||||
int GLInstancingRenderer::registerShape(const float* vertices, int numvertices, const int* indices, int numIndices)
|
||||
{
|
||||
btGraphicsInstance* gfxObj = new btGraphicsInstance;
|
||||
|
||||
if (m_graphicsInstances.size())
|
||||
{
|
||||
btGraphicsInstance* prevObj = m_graphicsInstances[m_graphicsInstances.size()-1];
|
||||
gfxObj->m_instanceOffset = prevObj->m_instanceOffset + prevObj->m_numGraphicsInstances;
|
||||
gfxObj->m_vertexArrayOffset = prevObj->m_vertexArrayOffset + prevObj->m_numVertices;
|
||||
} else
|
||||
{
|
||||
gfxObj->m_instanceOffset = 0;
|
||||
}
|
||||
|
||||
m_graphicsInstances.push_back(gfxObj);
|
||||
gfxObj->m_numIndices = numIndices;
|
||||
gfxObj->m_numVertices = numvertices;
|
||||
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, cube_vbo);
|
||||
char* dest= (char*)glMapBuffer( GL_ARRAY_BUFFER,GL_WRITE_ONLY);//GL_WRITE_ONLY
|
||||
int vertexStrideInBytes = 9*sizeof(float);
|
||||
int sz = numvertices*vertexStrideInBytes;
|
||||
memcpy(dest+vertexStrideInBytes*gfxObj->m_vertexArrayOffset,vertices,sz);
|
||||
glUnmapBuffer( GL_ARRAY_BUFFER);
|
||||
|
||||
glGenBuffers(1, &gfxObj->m_index_vbo);
|
||||
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, gfxObj->m_index_vbo);
|
||||
int indexBufferSizeInBytes = gfxObj->m_numIndices*sizeof(int);
|
||||
|
||||
glBufferData(GL_ELEMENT_ARRAY_BUFFER, indexBufferSizeInBytes, NULL, GL_STATIC_DRAW);
|
||||
glBufferSubData(GL_ELEMENT_ARRAY_BUFFER,0,indexBufferSizeInBytes,indices);
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
|
||||
|
||||
glGenVertexArrays(1, &gfxObj->m_cube_vao);
|
||||
glBindVertexArray(gfxObj->m_cube_vao);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, cube_vbo);
|
||||
glBindVertexArray(0);
|
||||
glBindBuffer(GL_ARRAY_BUFFER,0);
|
||||
glBindVertexArray(0);
|
||||
|
||||
|
||||
return m_graphicsInstances.size()-1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void GLInstancingRenderer::InitShaders()
|
||||
{
|
||||
|
||||
int POSITION_BUFFER_SIZE = (MAX_CONVEX_BODIES_CL*sizeof(float)*4);
|
||||
int ORIENTATION_BUFFER_SIZE = (MAX_CONVEX_BODIES_CL*sizeof(float)*4);
|
||||
int COLOR_BUFFER_SIZE = (MAX_CONVEX_BODIES_CL*sizeof(float)*4);
|
||||
int SCALE_BUFFER_SIZE = (MAX_CONVEX_BODIES_CL*sizeof(float)*3);
|
||||
|
||||
|
||||
instancingShader = gltLoadShaderPair(vertexShader,fragmentShader);
|
||||
|
||||
glLinkProgram(instancingShader);
|
||||
glUseProgram(instancingShader);
|
||||
angle_loc = glGetUniformLocation(instancingShader, "angle");
|
||||
ModelViewMatrix = glGetUniformLocation(instancingShader, "ModelViewMatrix");
|
||||
ProjectionMatrix = glGetUniformLocation(instancingShader, "ProjectionMatrix");
|
||||
uniform_texture_diffuse = glGetUniformLocation(instancingShader, "Diffuse");
|
||||
|
||||
GLuint offset = 0;
|
||||
|
||||
|
||||
glGenBuffers(1, &cube_vbo);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, cube_vbo);
|
||||
|
||||
|
||||
int size = SHAPE_BUFFER_SIZE + POSITION_BUFFER_SIZE+ORIENTATION_BUFFER_SIZE+COLOR_BUFFER_SIZE+SCALE_BUFFER_SIZE;
|
||||
VBOsize = size;
|
||||
|
||||
glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW);//GL_STATIC_DRAW);
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER,0);
|
||||
glBindVertexArray(0);
|
||||
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
void myinit()
|
||||
{
|
||||
GLint err = glGetError();
|
||||
|
||||
// GLfloat light_ambient[] = { btScalar(0.2), btScalar(0.2), btScalar(0.2), btScalar(1.0) };
|
||||
GLfloat light_ambient[] = { btScalar(1.0), btScalar(1.2), btScalar(0.2), btScalar(1.0) };
|
||||
|
||||
GLfloat light_diffuse[] = { btScalar(1.0), btScalar(1.0), btScalar(1.0), btScalar(1.0) };
|
||||
GLfloat light_specular[] = { btScalar(1.0), btScalar(1.0), btScalar(1.0), btScalar(1.0 )};
|
||||
/* light_position is NOT default value */
|
||||
GLfloat light_position0[] = { btScalar(10000.0), btScalar(10000.0), btScalar(10000.0), btScalar(0.0 )};
|
||||
GLfloat light_position1[] = { btScalar(-1.0), btScalar(-10.0), btScalar(-1.0), btScalar(0.0) };
|
||||
|
||||
glLightfv(GL_LIGHT0, GL_AMBIENT, light_ambient);
|
||||
glLightfv(GL_LIGHT0, GL_DIFFUSE, light_diffuse);
|
||||
glLightfv(GL_LIGHT0, GL_SPECULAR, light_specular);
|
||||
glLightfv(GL_LIGHT0, GL_POSITION, light_position0);
|
||||
|
||||
glLightfv(GL_LIGHT1, GL_AMBIENT, light_ambient);
|
||||
glLightfv(GL_LIGHT1, GL_DIFFUSE, light_diffuse);
|
||||
glLightfv(GL_LIGHT1, GL_SPECULAR, light_specular);
|
||||
glLightfv(GL_LIGHT1, GL_POSITION, light_position1);
|
||||
|
||||
glEnable(GL_LIGHTING);
|
||||
glEnable(GL_LIGHT0);
|
||||
glEnable(GL_LIGHT1);
|
||||
|
||||
|
||||
// glShadeModel(GL_FLAT);//GL_SMOOTH);
|
||||
glShadeModel(GL_SMOOTH);
|
||||
|
||||
glEnable(GL_DEPTH_TEST);
|
||||
glDepthFunc(GL_LESS);
|
||||
|
||||
glClearColor(float(0.7),float(0.7),float(0.7),float(0));
|
||||
glEnable(GL_LIGHTING);
|
||||
glEnable(GL_LIGHT0);
|
||||
|
||||
|
||||
static bool m_textureenabled = true;
|
||||
static bool m_textureinitialized = false;
|
||||
|
||||
|
||||
if(m_textureenabled)
|
||||
{
|
||||
if(!m_textureinitialized)
|
||||
{
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
|
||||
GLubyte* image=new GLubyte[256*256*3];
|
||||
for(int y=0;y<256;++y)
|
||||
{
|
||||
const int t=y>>5;
|
||||
GLubyte* pi=image+y*256*3;
|
||||
for(int x=0;x<256;++x)
|
||||
{
|
||||
if (x<2||y<2||x>253||y>253)
|
||||
{
|
||||
pi[0]=0;
|
||||
pi[1]=0;
|
||||
pi[2]=0;
|
||||
} else
|
||||
{
|
||||
pi[0]=255;
|
||||
pi[1]=255;
|
||||
pi[2]=255;
|
||||
}
|
||||
|
||||
/*
|
||||
const int s=x>>5;
|
||||
const GLubyte b=180;
|
||||
GLubyte c=b+((s+t&1)&1)*(255-b);
|
||||
pi[0]=c;
|
||||
pi[1]=c;
|
||||
pi[2]=c;
|
||||
*/
|
||||
|
||||
pi+=3;
|
||||
}
|
||||
}
|
||||
|
||||
glGenTextures(1,(GLuint*)&m_texturehandle);
|
||||
glBindTexture(GL_TEXTURE_2D,m_texturehandle);
|
||||
glTexEnvf(GL_TEXTURE_ENV,GL_TEXTURE_ENV_MODE,GL_MODULATE);
|
||||
glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_MIN_FILTER,GL_LINEAR_MIPMAP_LINEAR);
|
||||
glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_MAG_FILTER,GL_LINEAR);
|
||||
glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_WRAP_S,GL_REPEAT);
|
||||
glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_WRAP_T,GL_REPEAT);
|
||||
gluBuild2DMipmaps(GL_TEXTURE_2D,3,256,256,GL_RGB,GL_UNSIGNED_BYTE,image);
|
||||
delete[] image;
|
||||
m_textureinitialized=true;
|
||||
}
|
||||
// glMatrixMode(GL_TEXTURE);
|
||||
// glLoadIdentity();
|
||||
// glMatrixMode(GL_MODELVIEW);
|
||||
|
||||
glEnable(GL_TEXTURE_2D);
|
||||
glBindTexture(GL_TEXTURE_2D,m_texturehandle);
|
||||
|
||||
} else
|
||||
{
|
||||
glDisable(GL_TEXTURE_2D);
|
||||
}
|
||||
|
||||
glEnable(GL_COLOR_MATERIAL);
|
||||
|
||||
err = glGetError();
|
||||
assert(err==GL_NO_ERROR);
|
||||
|
||||
// glEnable(GL_CULL_FACE);
|
||||
// glCullFace(GL_BACK);
|
||||
}
|
||||
|
||||
void updateCamera()
|
||||
{
|
||||
|
||||
|
||||
|
||||
btVector3 m_cameraUp(0,1,0);
|
||||
int m_forwardAxis=2;
|
||||
|
||||
|
||||
glMatrixMode(GL_PROJECTION);
|
||||
glLoadIdentity();
|
||||
|
||||
|
||||
//m_azi+=0.0f;
|
||||
|
||||
btScalar rele = m_ele * btScalar(0.01745329251994329547);// rads per deg
|
||||
btScalar razi = m_azi * btScalar(0.01745329251994329547);// rads per deg
|
||||
|
||||
|
||||
btQuaternion rot(m_cameraUp,razi);
|
||||
|
||||
|
||||
btVector3 eyePos(0,0,0);
|
||||
eyePos[m_forwardAxis] = -m_cameraDistance;
|
||||
|
||||
btVector3 forward(eyePos[0],eyePos[1],eyePos[2]);
|
||||
if (forward.length2() < SIMD_EPSILON)
|
||||
{
|
||||
forward.setValue(1.f,0.f,0.f);
|
||||
}
|
||||
btVector3 right = m_cameraUp.cross(forward);
|
||||
btQuaternion roll(right,-rele);
|
||||
|
||||
eyePos = btMatrix3x3(rot) * btMatrix3x3(roll) * eyePos;
|
||||
|
||||
m_cameraPosition[0] = eyePos.getX();
|
||||
m_cameraPosition[1] = eyePos.getY();
|
||||
m_cameraPosition[2] = eyePos.getZ();
|
||||
m_cameraPosition += m_cameraTargetPosition;
|
||||
|
||||
|
||||
float m_frustumZNear=1;
|
||||
float m_frustumZFar=1000;
|
||||
|
||||
if (m_glutScreenWidth == 0 && m_glutScreenHeight == 0)
|
||||
return;
|
||||
|
||||
float aspect;
|
||||
btVector3 extents;
|
||||
|
||||
if (m_glutScreenWidth > m_glutScreenHeight)
|
||||
{
|
||||
aspect = m_glutScreenWidth / (float)m_glutScreenHeight;
|
||||
extents.setValue(aspect * 1.0f, 1.0f,0);
|
||||
} else
|
||||
{
|
||||
aspect = m_glutScreenHeight / (float)m_glutScreenWidth;
|
||||
extents.setValue(1.0f, aspect*1.f,0);
|
||||
}
|
||||
|
||||
|
||||
if (m_ortho)
|
||||
{
|
||||
// reset matrix
|
||||
glLoadIdentity();
|
||||
extents *= m_cameraDistance;
|
||||
btVector3 lower = m_cameraTargetPosition - extents;
|
||||
btVector3 upper = m_cameraTargetPosition + extents;
|
||||
glOrtho(lower.getX(), upper.getX(), lower.getY(), upper.getY(),-1000,1000);
|
||||
|
||||
glMatrixMode(GL_MODELVIEW);
|
||||
glLoadIdentity();
|
||||
} else
|
||||
{
|
||||
if (m_glutScreenWidth > m_glutScreenHeight)
|
||||
{
|
||||
glFrustum (-aspect * m_frustumZNear, aspect * m_frustumZNear, -m_frustumZNear, m_frustumZNear, m_frustumZNear, m_frustumZFar);
|
||||
} else
|
||||
{
|
||||
glFrustum (-aspect * m_frustumZNear, aspect * m_frustumZNear, -m_frustumZNear, m_frustumZNear, m_frustumZNear, m_frustumZFar);
|
||||
}
|
||||
glMatrixMode(GL_MODELVIEW);
|
||||
glLoadIdentity();
|
||||
gluLookAt(m_cameraPosition[0], m_cameraPosition[1], m_cameraPosition[2],
|
||||
m_cameraTargetPosition[0], m_cameraTargetPosition[1], m_cameraTargetPosition[2],
|
||||
m_cameraUp.getX(),m_cameraUp.getY(),m_cameraUp.getZ());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
void GLInstancingRenderer::RenderScene(void)
|
||||
{
|
||||
BT_PROFILE("GlutDisplayFunc");
|
||||
|
||||
myinit();
|
||||
|
||||
updateCamera();
|
||||
|
||||
//render coordinate system
|
||||
glBegin(GL_LINES);
|
||||
glColor3f(1,0,0);
|
||||
glVertex3f(0,0,0);
|
||||
glVertex3f(1,0,0);
|
||||
glColor3f(0,1,0);
|
||||
glVertex3f(0,0,0);
|
||||
glVertex3f(0,1,0);
|
||||
glColor3f(0,0,1);
|
||||
glVertex3f(0,0,0);
|
||||
glVertex3f(0,0,1);
|
||||
glEnd();
|
||||
|
||||
//do a finish, to make sure timings are clean
|
||||
// glFinish();
|
||||
|
||||
|
||||
|
||||
// glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, cube_vbo);
|
||||
glFlush();
|
||||
|
||||
//updatePos();
|
||||
|
||||
// simulationLoop();
|
||||
|
||||
//useCPU = true;
|
||||
|
||||
int totalNumInstances = 0;
|
||||
|
||||
for (int i=0;i<m_graphicsInstances.size();i++)
|
||||
{
|
||||
totalNumInstances+=m_graphicsInstances[i]->m_numGraphicsInstances;
|
||||
}
|
||||
|
||||
int curOffset = 0;
|
||||
|
||||
for (int i=0;i<m_graphicsInstances.size();i++)
|
||||
{
|
||||
|
||||
btGraphicsInstance* gfxObj = m_graphicsInstances[i];
|
||||
int myOffset = gfxObj->m_instanceOffset*4*sizeof(float);
|
||||
|
||||
int POSITION_BUFFER_SIZE = (totalNumInstances*sizeof(float)*4);
|
||||
int ORIENTATION_BUFFER_SIZE = (totalNumInstances*sizeof(float)*4);
|
||||
int COLOR_BUFFER_SIZE = (totalNumInstances*sizeof(float)*4);
|
||||
int SCALE_BUFFER_SIZE = (totalNumInstances*sizeof(float)*3);
|
||||
|
||||
glBindVertexArray(gfxObj->m_cube_vao);
|
||||
|
||||
|
||||
int vertexStride = 9*sizeof(float);
|
||||
int vertexBase = gfxObj->m_vertexArrayOffset*vertexStride;
|
||||
|
||||
glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, 9*sizeof(float), (GLvoid*)vertexBase);
|
||||
glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, 0, (GLvoid *)(curOffset*4*sizeof(float)+SHAPE_BUFFER_SIZE));
|
||||
glVertexAttribPointer(2, 4, GL_FLOAT, GL_FALSE, 0, (GLvoid *)(curOffset*4*sizeof(float)+SHAPE_BUFFER_SIZE+POSITION_BUFFER_SIZE));
|
||||
int uvoffset = 7*sizeof(float)+vertexBase;
|
||||
int normaloffset = 4*sizeof(float)+vertexBase;
|
||||
|
||||
glVertexAttribPointer(3, 2, GL_FLOAT, GL_FALSE, 9*sizeof(float), (GLvoid *)uvoffset);
|
||||
glVertexAttribPointer(4, 3, GL_FLOAT, GL_FALSE, 9*sizeof(float), (GLvoid *)normaloffset);
|
||||
glVertexAttribPointer(5, 4, GL_FLOAT, GL_FALSE, 0, (GLvoid *)(curOffset*4*sizeof(float)+SHAPE_BUFFER_SIZE+POSITION_BUFFER_SIZE+ORIENTATION_BUFFER_SIZE));
|
||||
glVertexAttribPointer(6, 3, GL_FLOAT, GL_FALSE, 0, (GLvoid *)(curOffset*3*sizeof(float)+SHAPE_BUFFER_SIZE+POSITION_BUFFER_SIZE+ORIENTATION_BUFFER_SIZE+COLOR_BUFFER_SIZE));
|
||||
|
||||
glEnableVertexAttribArray(0);
|
||||
glEnableVertexAttribArray(1);
|
||||
glEnableVertexAttribArray(2);
|
||||
glEnableVertexAttribArray(3);
|
||||
glEnableVertexAttribArray(4);
|
||||
glEnableVertexAttribArray(5);
|
||||
glEnableVertexAttribArray(6);
|
||||
|
||||
glVertexAttribDivisor(0, 0);
|
||||
glVertexAttribDivisor(1, 1);
|
||||
glVertexAttribDivisor(2, 1);
|
||||
glVertexAttribDivisor(3, 0);
|
||||
glVertexAttribDivisor(4, 0);
|
||||
glVertexAttribDivisor(5, 1);
|
||||
glVertexAttribDivisor(6, 1);
|
||||
|
||||
glUseProgram(instancingShader);
|
||||
glUniform1f(angle_loc, 0);
|
||||
GLfloat pm[16];
|
||||
glGetFloatv(GL_PROJECTION_MATRIX, pm);
|
||||
glUniformMatrix4fv(ProjectionMatrix, 1, false, &pm[0]);
|
||||
|
||||
GLfloat mvm[16];
|
||||
glGetFloatv(GL_MODELVIEW_MATRIX, mvm);
|
||||
glUniformMatrix4fv(ModelViewMatrix, 1, false, &mvm[0]);
|
||||
|
||||
glUniform1i(uniform_texture_diffuse, 0);
|
||||
|
||||
glFlush();
|
||||
|
||||
if (gfxObj->m_numGraphicsInstances)
|
||||
{
|
||||
int indexCount = gfxObj->m_numIndices;
|
||||
int indexOffset = 0;
|
||||
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, gfxObj->m_index_vbo);
|
||||
{
|
||||
BT_PROFILE("glDrawElementsInstanced");
|
||||
glDrawElementsInstanced(GL_TRIANGLES, indexCount, GL_UNSIGNED_INT, (void*)indexOffset, gfxObj->m_numGraphicsInstances);
|
||||
}
|
||||
}
|
||||
curOffset+= gfxObj->m_numGraphicsInstances;
|
||||
}
|
||||
glUseProgram(0);
|
||||
glBindBuffer(GL_ARRAY_BUFFER,0);
|
||||
glBindVertexArray(0);
|
||||
|
||||
|
||||
GLint err = glGetError();
|
||||
assert(err==GL_NO_ERROR);
|
||||
}
|
||||
|
||||
|
||||
void GLInstancingRenderer::CleanupShaders()
|
||||
{
|
||||
|
||||
delete []m_data->m_instance_positions_ptr;
|
||||
delete []m_data->m_instance_quaternion_ptr;
|
||||
delete []m_data->m_instance_colors_ptr;
|
||||
delete []m_data->m_instance_scale_ptr;
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Erwin Coumans
|
||||
|
||||
#ifndef GL_INSTANCING_RENDERER_H
|
||||
#define GL_INSTANCING_RENDERER_H
|
||||
|
||||
#include "LinearMath/btAlignedObjectArray.h"
|
||||
|
||||
class GLInstancingRenderer
|
||||
{
|
||||
|
||||
btAlignedObjectArray<struct btGraphicsInstance*> m_graphicsInstances;
|
||||
|
||||
struct InternalDataRenderer* m_data;
|
||||
|
||||
public:
|
||||
GLInstancingRenderer();
|
||||
virtual ~GLInstancingRenderer();
|
||||
|
||||
void InitShaders();
|
||||
void RenderScene(void);
|
||||
void CleanupShaders();
|
||||
|
||||
///vertices must be in the format x,y,z, nx,ny,nz, u,v
|
||||
int registerShape(const float* vertices, int numvertices, const int* indices, int numIndices);
|
||||
|
||||
///position x,y,z, quaternion x,y,z,w, color r,g,b,a, scaling x,y,z
|
||||
int registerGraphicsInstance(int shapeIndex, const float* position, const float* quaternion, const float* color, const float* scaling);
|
||||
|
||||
void writeTransforms();
|
||||
};
|
||||
|
||||
#endif //GL_INSTANCING_RENDERER_H
|
||||
@@ -0,0 +1,107 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Erwin Coumans
|
||||
|
||||
|
||||
#include <GL/glew.h>
|
||||
#include "GlutRenderer.h"
|
||||
#include <stdio.h>
|
||||
|
||||
|
||||
GlutRenderer* GlutRenderer::gDemoApplication;
|
||||
|
||||
|
||||
|
||||
void GlutRenderer::runMainLoop()
|
||||
{
|
||||
glutMainLoop();
|
||||
|
||||
}
|
||||
|
||||
|
||||
static void glutKeyboardCallback(unsigned char key, int x, int y) { GlutRenderer::gDemoApplication->keyboardCallback(key,x,y); }
|
||||
static void glutKeyboardUpCallback(unsigned char key, int x, int y){ GlutRenderer::gDemoApplication->keyboardUpCallback(key,x,y);}
|
||||
static void glutSpecialKeyboardCallback(int key, int x, int y){ GlutRenderer::gDemoApplication->specialKeyboard(key,x,y);}
|
||||
static void glutSpecialKeyboardUpCallback(int key, int x, int y){ GlutRenderer::gDemoApplication->specialKeyboardUp(key,x,y);}
|
||||
static void glutReshapeCallback(int w, int h){ GlutRenderer::gDemoApplication->resize(w,h);}
|
||||
static void glutIdleCallback(){ glutPostRedisplay (); }
|
||||
static void glutMouseFuncCallback(int button, int state, int x, int y){ GlutRenderer::gDemoApplication->mouseFunc(button,state,x,y);}
|
||||
static void glutMotionFuncCallback(int x,int y){ GlutRenderer::gDemoApplication->mouseMotionFunc(x,y);}
|
||||
static void glutDisplayCallback(void){ GlutRenderer::gDemoApplication->displayCallback();}
|
||||
|
||||
|
||||
void GlutRenderer::resize(int width, int height)
|
||||
{
|
||||
m_glutScreenWidth = width;
|
||||
m_glutScreenHeight = height;
|
||||
}
|
||||
|
||||
void GlutRenderer::mouseFunc(int button, int state, int x, int y)
|
||||
{
|
||||
}
|
||||
void GlutRenderer::mouseMotionFunc(int x,int y)
|
||||
{
|
||||
}
|
||||
|
||||
void GlutRenderer::renderScene()
|
||||
{
|
||||
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
|
||||
glutSwapBuffers();
|
||||
glutPostRedisplay();
|
||||
|
||||
GLint err = glGetError();
|
||||
assert(err==GL_NO_ERROR);
|
||||
}
|
||||
|
||||
void GlutRenderer::displayCallback()
|
||||
{
|
||||
updateScene();
|
||||
|
||||
renderScene();
|
||||
}
|
||||
|
||||
GlutRenderer::GlutRenderer(int argc, char* argv[])
|
||||
{
|
||||
glutInit(&argc, argv);
|
||||
gDemoApplication = this;
|
||||
}
|
||||
|
||||
void GlutRenderer::initGraphics(int width, int height)
|
||||
{
|
||||
m_glutScreenWidth = width;
|
||||
m_glutScreenHeight = height;
|
||||
|
||||
glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGBA);
|
||||
|
||||
glutInitWindowSize(m_glutScreenWidth, m_glutScreenHeight);
|
||||
glutCreateWindow("GPU rigid body pipeline2");
|
||||
glutKeyboardFunc(glutKeyboardCallback);
|
||||
glutKeyboardUpFunc(glutKeyboardUpCallback);
|
||||
glutSpecialFunc(glutSpecialKeyboardCallback);
|
||||
glutSpecialUpFunc(glutSpecialKeyboardUpCallback);
|
||||
glutReshapeFunc(glutReshapeCallback);
|
||||
glutIdleFunc(glutIdleCallback);
|
||||
glutMouseFunc(glutMouseFuncCallback);
|
||||
glutPassiveMotionFunc(glutMotionFuncCallback);
|
||||
glutMotionFunc(glutMotionFuncCallback);
|
||||
glutDisplayFunc( glutDisplayCallback );
|
||||
|
||||
GLenum err = glewInit();
|
||||
if (GLEW_OK != err)
|
||||
{
|
||||
printf("Error: %s\n", glewGetErrorString(err));
|
||||
}
|
||||
|
||||
glClearColor(0.6f,0.6f,1.f,1.f);
|
||||
}
|
||||
@@ -0,0 +1,59 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Erwin Coumans
|
||||
|
||||
#ifndef GLUT_RENDERER_H
|
||||
#define GLUT_RENDERER_H
|
||||
|
||||
#include "btGlutInclude.h"
|
||||
#include "LinearMath/btVector3.h"
|
||||
|
||||
struct GlutRenderer
|
||||
{
|
||||
static GlutRenderer* gDemoApplication;
|
||||
int m_glutScreenWidth;
|
||||
int m_glutScreenHeight;
|
||||
|
||||
btVector3 m_cameraPosition;
|
||||
btVector3 m_cameraTargetPosition;
|
||||
btScalar m_cameraDistance;
|
||||
btVector3 m_cameraUp;
|
||||
float m_azimuth;
|
||||
float m_elevation;
|
||||
|
||||
|
||||
GlutRenderer(int argc, char* argv[]);
|
||||
|
||||
virtual void initGraphics(int width, int height);
|
||||
virtual void cleanup() {}
|
||||
|
||||
void runMainLoop();
|
||||
|
||||
virtual void updateScene(){};
|
||||
|
||||
virtual void renderScene();
|
||||
|
||||
virtual void keyboardCallback(unsigned char key, int x, int y) {};
|
||||
virtual void keyboardUpCallback(unsigned char key, int x, int y) {}
|
||||
virtual void specialKeyboard(int key, int x, int y){}
|
||||
virtual void specialKeyboardUp(int key, int x, int y){}
|
||||
virtual void resize(int w, int h);
|
||||
virtual void mouseFunc(int button, int state, int x, int y);
|
||||
virtual void mouseMotionFunc(int x,int y);
|
||||
virtual void displayCallback();
|
||||
|
||||
|
||||
};
|
||||
|
||||
#endif //GLUT_RENDERER_H
|
||||
@@ -0,0 +1,64 @@
|
||||
|
||||
hasCL = findOpenCL_NVIDIA()
|
||||
|
||||
if (hasCL) then
|
||||
|
||||
project "OpenCL_gpu_rigidbody_pipeline2_NVIDIA"
|
||||
|
||||
initOpenCL_NVIDIA()
|
||||
|
||||
language "C++"
|
||||
|
||||
kind "ConsoleApp"
|
||||
targetdir "../../../bin"
|
||||
|
||||
initOpenGL()
|
||||
initGlew()
|
||||
|
||||
|
||||
includedirs {
|
||||
"../../primitives",
|
||||
"../../../bullet2"
|
||||
}
|
||||
|
||||
files {
|
||||
"../main.cpp",
|
||||
"../CLPhysicsDemo.cpp",
|
||||
"../CLPhysicsDemo.h",
|
||||
"../GLInstancingRenderer.cpp",
|
||||
"../GLInstancingRenderer.h",
|
||||
"../GlutRenderer.cpp",
|
||||
"../GlutRenderer.h",
|
||||
"../Win32OpenGLRenderManager.cpp",
|
||||
"../Win32OpenGLRenderManager.h",
|
||||
"../../gpu_rigidbody_pipeline/btConvexUtility.cpp",
|
||||
"../../gpu_rigidbody_pipeline/btConvexUtility.h",
|
||||
"../../gpu_rigidbody_pipeline/btGpuNarrowPhaseAndSolver.cpp",
|
||||
"../../gpu_rigidbody_pipeline/btGpuNarrowPhaseAndSolver.h",
|
||||
"../../../dynamics/basic_demo/ConvexHeightFieldShape.cpp",
|
||||
"../../../dynamics/basic_demo/ConvexHeightFieldShape.h",
|
||||
"../../../bullet2/LinearMath/btConvexHullComputer.cpp",
|
||||
"../../../bullet2/LinearMath/btConvexHullComputer.h",
|
||||
"../../broadphase_benchmark/findPairsOpenCL.cpp",
|
||||
"../../broadphase_benchmark/findPairsOpenCL.h",
|
||||
"../../broadphase_benchmark/btGridBroadphaseCL.cpp",
|
||||
"../../broadphase_benchmark/btGridBroadphaseCL.h",
|
||||
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.cpp",
|
||||
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.h",
|
||||
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.cpp",
|
||||
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.h",
|
||||
"../../../bullet2/LinearMath/btAlignedAllocator.cpp",
|
||||
"../../../bullet2/LinearMath/btQuickprof.cpp",
|
||||
"../../../bullet2/LinearMath/btQuickprof.h",
|
||||
"../../../bullet2/BulletCollision/BroadphaseCollision/btBroadphaseProxy.cpp",
|
||||
"../../../bullet2/BulletCollision/BroadphaseCollision/btOverlappingPairCache.cpp",
|
||||
"../../../bullet2/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.h",
|
||||
"../../opengl_interop/btOpenCLGLInteropBuffer.cpp",
|
||||
"../../opengl_interop/btOpenCLGLInteropBuffer.h",
|
||||
"../../opengl_interop/btStopwatch.cpp",
|
||||
"../../opengl_interop/btStopwatch.h"
|
||||
}
|
||||
|
||||
end
|
||||
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Erwin Coumans
|
||||
|
||||
|
||||
#ifndef __OPENGL_INCLUDE_H
|
||||
#define __OPENGL_INCLUDE_H
|
||||
|
||||
#include <GL/glew.h>
|
||||
|
||||
//think different
|
||||
#if defined(__APPLE__) && !defined (VMDMESA)
|
||||
#include <OpenGL/OpenGL.h>
|
||||
#include <OpenGL/gl.h>
|
||||
#include <OpenGL/glu.h>
|
||||
#else
|
||||
|
||||
|
||||
#ifdef _WINDOWS
|
||||
#include <windows.h>
|
||||
#include <GL/gl.h>
|
||||
#include <GL/glu.h>
|
||||
#else
|
||||
#include <GL/gl.h>
|
||||
|
||||
#endif //_WINDOWS
|
||||
#endif //APPLE
|
||||
|
||||
#endif //__OPENGL_INCLUDE_H
|
||||
|
||||
@@ -0,0 +1,210 @@
|
||||
#ifndef SHAPE_DATA_H
|
||||
#define SHAPE_DATA_H
|
||||
|
||||
static float barrel_vertices[] = {
|
||||
0.0f,-0.5f,0.0f, 1.0f, 0.0f,-1.0f,0.0f, 0.5f, 0.5f,
|
||||
0.282362f,-0.5f,-0.205148f, 1.0f, 0.0f,-1.0f,0.0f, 0.5f, 0.5f,
|
||||
0.349018f,-0.5f,0.0f, 1.0f, 0.0f,-1.0f,0.0f, 0.5f, 0.5f,
|
||||
0.107853f,-0.5f,-0.331936f, 1.0f, 0.0f,-1.0f,0.0f, 0.5f, 0.5f,
|
||||
-0.107853f,-0.5f,-0.331936f, 1.0f, 0.0f,-1.0f,0.0f, 0.5f, 0.5f,
|
||||
0.107853f,-0.5f,-0.331936f, 1.0f, 0.0f,-1.0f,0.0f, 0.5f, 0.5f,
|
||||
-0.282362f,-0.5f,-0.205148f, 1.0f, 0.0f,-1.0f,0.0f, 0.5f, 0.5f,
|
||||
-0.349018f,-0.5f,0.0f, 1.0f, 0.0f,-1.0f,0.0f, 0.5f, 0.5f,
|
||||
-0.282362f,-0.5f,0.205148f, 1.0f, 0.0f,-1.0f,0.0f, 0.5f, 0.5f,
|
||||
-0.107853f,-0.5f,0.331936f, 1.0f, 0.0f,-1.0f,0.0f, 0.5f, 0.5f,
|
||||
0.107853f,-0.5f,0.331936f, 1.0f, 0.0f,-1.0f,0.0f, 0.5f, 0.5f,
|
||||
0.282362f,-0.5f,0.205148f, 1.0f, 0.0f,-1.0f,0.0f, 0.5f, 0.5f,
|
||||
0.0f,0.5f,0.0f, 1.0f, 0.0f,1.0f,0.0f, 0.5f, 0.5f,
|
||||
0.349018f,0.5f,0.0f, 1.0f, 0.0f,1.0f,0.0f, 0.5f, 0.5f,
|
||||
0.282362f,0.5f,-0.205148f, 1.0f, 0.0f,1.0f,0.0f, 0.5f, 0.5f,
|
||||
0.107853f,0.5f,-0.331936f, 1.0f, 0.0f,1.0f,0.0f, 0.5f, 0.5f,
|
||||
0.107853f,0.5f,-0.331936f, 1.0f, 0.0f,1.0f,0.0f, 0.5f, 0.5f,
|
||||
-0.107853f,0.5f,-0.331936f, 1.0f, 0.0f,1.0f,0.0f, 0.5f, 0.5f,
|
||||
-0.282362f,0.5f,-0.205148f, 1.0f, 0.0f,1.0f,0.0f, 0.5f, 0.5f,
|
||||
-0.349018f,0.5f,0.0f, 1.0f, 0.0f,1.0f,0.0f, 0.5f, 0.5f,
|
||||
-0.282362f,0.5f,0.205148f, 1.0f, 0.0f,1.0f,0.0f, 0.5f, 0.5f,
|
||||
-0.107853f,0.5f,0.331936f, 1.0f, 0.0f,1.0f,0.0f, 0.5f, 0.5f,
|
||||
0.107853f,0.5f,0.331936f, 1.0f, 0.0f,1.0f,0.0f, 0.5f, 0.5f,
|
||||
0.282362f,0.5f,0.205148f, 1.0f, 0.0f,1.0f,0.0f, 0.5f, 0.5f,
|
||||
0.349018f,-0.5f,0.0f, 1.0f, 0.957307f,-0.289072f,0.0f, 0.5f, 0.5f,
|
||||
0.404509f,0.0f,-0.293893f, 1.0f, 0.809017f,0.0f,-0.587785f, 0.5f, 0.5f,
|
||||
0.5f,0.0f,0.0f, 1.0f, 1.0f,0.0f,0.0f, 0.5f, 0.5f,
|
||||
0.282362f,-0.5f,-0.205148f, 1.0f, 0.774478f,-0.289072f,-0.562691f, 0.5f, 0.5f,
|
||||
0.154508f,0.0f,-0.475528f, 1.0f, 0.309017f,0.0f,-0.951057f, 0.5f, 0.5f,
|
||||
0.107853f,-0.5f,-0.331936f, 1.0f, 0.295824f,-0.289072f,-0.910453f, 0.5f, 0.5f,
|
||||
0.107853f,-0.5f,-0.331936f, 1.0f, 0.295824f,-0.289072f,-0.910453f, 0.5f, 0.5f,
|
||||
-0.154509f,0.0f,-0.475528f, 1.0f, -0.309017f,0.0f,-0.951057f, 0.5f, 0.5f,
|
||||
0.154508f,0.0f,-0.475528f, 1.0f, 0.309017f,0.0f,-0.951057f, 0.5f, 0.5f,
|
||||
-0.107853f,-0.5f,-0.331936f, 1.0f, -0.295824f,-0.289072f,-0.910453f, 0.5f, 0.5f,
|
||||
-0.404509f,0.0f,-0.293893f, 1.0f, -0.809017f,0.0f,-0.587785f, 0.5f, 0.5f,
|
||||
-0.282362f,-0.5f,-0.205148f, 1.0f, -0.774478f,-0.289072f,-0.562691f, 0.5f, 0.5f,
|
||||
-0.5f,0.0f,0.0f, 1.0f, -1.0f,0.0f,0.0f, 0.5f, 0.5f,
|
||||
-0.349018f,-0.5f,0.0f, 1.0f, -0.957307f,-0.289072f,0.0f, 0.5f, 0.5f,
|
||||
-0.404508f,0.0f,0.293893f, 1.0f, -0.809017f,0.0f,0.587785f, 0.5f, 0.5f,
|
||||
-0.282362f,-0.5f,0.205148f, 1.0f, -0.774478f,-0.289072f,0.562691f, 0.5f, 0.5f,
|
||||
-0.154509f,0.0f,0.475528f, 1.0f, -0.309017f,0.0f,0.951056f, 0.5f, 0.5f,
|
||||
-0.107853f,-0.5f,0.331936f, 1.0f, -0.295824f,-0.289072f,0.910453f, 0.5f, 0.5f,
|
||||
0.154509f,0.0f,0.475528f, 1.0f, 0.309017f,0.0f,0.951056f, 0.5f, 0.5f,
|
||||
0.107853f,-0.5f,0.331936f, 1.0f, 0.295824f,-0.289072f,0.910453f, 0.5f, 0.5f,
|
||||
0.404509f,0.0f,0.293892f, 1.0f, 0.809017f,0.0f,0.587785f, 0.5f, 0.5f,
|
||||
0.282362f,-0.5f,0.205148f, 1.0f, 0.774478f,-0.289072f,0.562691f, 0.5f, 0.5f,
|
||||
0.282362f,0.5f,-0.205148f, 1.0f, 0.774478f,0.289072f,-0.562691f, 0.5f, 0.5f,
|
||||
0.349018f,0.5f,0.0f, 1.0f, 0.957307f,0.289072f,0.0f, 0.5f, 0.5f,
|
||||
0.107853f,0.5f,-0.331936f, 1.0f, 0.295824f,0.289072f,-0.910453f, 0.5f, 0.5f,
|
||||
-0.107853f,0.5f,-0.331936f, 1.0f, -0.295824f,0.289072f,-0.910453f, 0.5f, 0.5f,
|
||||
0.107853f,0.5f,-0.331936f, 1.0f, 0.295824f,0.289072f,-0.910453f, 0.5f, 0.5f,
|
||||
-0.282362f,0.5f,-0.205148f, 1.0f, -0.774478f,0.289072f,-0.562691f, 0.5f, 0.5f,
|
||||
-0.349018f,0.5f,0.0f, 1.0f, -0.957307f,0.289072f,0.0f, 0.5f, 0.5f,
|
||||
-0.282362f,0.5f,0.205148f, 1.0f, -0.774478f,0.289072f,0.562691f, 0.5f, 0.5f,
|
||||
-0.107853f,0.5f,0.331936f, 1.0f, -0.295824f,0.289072f,0.910453f, 0.5f, 0.5f,
|
||||
0.107853f,0.5f,0.331936f, 1.0f, 0.295824f,0.289072f,0.910453f, 0.5f, 0.5f,
|
||||
0.282362f,0.5f,0.205148f, 1.0f, 0.774478f,0.289072f,0.562691f, 0.5f, 0.5f
|
||||
};
|
||||
|
||||
|
||||
|
||||
static int barrel_indices[] = {
|
||||
0,1,2,
|
||||
0,3,1,
|
||||
0,4,5,
|
||||
0,6,4,
|
||||
0,7,6,
|
||||
0,8,7,
|
||||
0,9,8,
|
||||
0,10,9,
|
||||
0,11,10,
|
||||
0,2,11,
|
||||
12,13,14,
|
||||
12,14,15,
|
||||
12,16,17,
|
||||
12,17,18,
|
||||
12,18,19,
|
||||
12,19,20,
|
||||
12,20,21,
|
||||
12,21,22,
|
||||
12,22,23,
|
||||
12,23,13,
|
||||
24,25,26,
|
||||
24,27,25,
|
||||
27,28,25,
|
||||
27,29,28,
|
||||
30,31,32,
|
||||
30,33,31,
|
||||
33,34,31,
|
||||
33,35,34,
|
||||
35,36,34,
|
||||
35,37,36,
|
||||
37,38,36,
|
||||
37,39,38,
|
||||
39,40,38,
|
||||
39,41,40,
|
||||
41,42,40,
|
||||
41,43,42,
|
||||
43,44,42,
|
||||
43,45,44,
|
||||
45,26,44,
|
||||
45,24,26,
|
||||
26,46,47,
|
||||
26,25,46,
|
||||
25,48,46,
|
||||
25,28,48,
|
||||
32,49,50,
|
||||
32,31,49,
|
||||
31,51,49,
|
||||
31,34,51,
|
||||
34,52,51,
|
||||
34,36,52,
|
||||
36,53,52,
|
||||
36,38,53,
|
||||
38,54,53,
|
||||
38,40,54,
|
||||
40,55,54,
|
||||
40,42,55,
|
||||
42,56,55,
|
||||
42,44,56,
|
||||
44,47,56,
|
||||
44,26,47,
|
||||
};
|
||||
|
||||
|
||||
///position xyz, unused w, normal, uv
|
||||
static const float cube_vertices[] =
|
||||
{
|
||||
-0.5f, -0.5f, 0.5f, 0.0f, 0,0,1, 0,0,//0
|
||||
0.5f, -0.5f, 0.5f, 0.0f, 0,0,1, 1,0,//1
|
||||
0.5f, 0.5f, 0.5f, 0.0f, 0,0,1, 1,1,//2
|
||||
-0.5f, 0.5f, 0.5f, 0.0f, 0,0,1, 0,1 ,//3
|
||||
|
||||
-0.5f, -0.5f, -0.5f, 0.5f, 0,0,-1, 0,0,//4
|
||||
0.5f, -0.5f, -0.5f, 0.5f, 0,0,-1, 1,0,//5
|
||||
0.5f, 0.5f, -0.5f, 0.5f, 0,0,-1, 1,1,//6
|
||||
-0.5f, 0.5f, -0.5f, 0.5f, 0,0,-1, 0,1,//7
|
||||
|
||||
-0.5f, -0.5f, -0.5f, 0.5f, -1,0,0, 0,0,
|
||||
-0.5f, 0.5f, -0.5f, 0.5f, -1,0,0, 1,0,
|
||||
-0.5f, 0.5f, 0.5f, 0.5f, -1,0,0, 1,1,
|
||||
-0.5f, -0.5f, 0.5f, 0.5f, -1,0,0, 0,1,
|
||||
|
||||
0.5f, -0.5f, -0.5f, 0.5f, 1,0,0, 0,0,
|
||||
0.5f, 0.5f, -0.5f, 0.5f, 1,0,0, 1,0,
|
||||
0.5f, 0.5f, 0.5f, 0.5f, 1,0,0, 1,1,
|
||||
0.5f, -0.5f, 0.5f, 0.5f, 1,0,0, 0,1,
|
||||
|
||||
-0.5f, -0.5f, -0.5f, 0.5f, 0,-1,0, 0,0,
|
||||
-0.5f, -0.5f, 0.5f, 0.5f, 0,-1,0, 1,0,
|
||||
0.5f, -0.5f, 0.5f, 0.5f, 0,-1,0, 1,1,
|
||||
0.5f,-0.5f, -0.5f, 0.5f, 0,-1,0, 0,1,
|
||||
|
||||
-0.5f, 0.5f, -0.5f, 0.5f, 0,1,0, 0,0,
|
||||
-0.5f, 0.5f, 0.5f, 0.5f, 0,1,0, 1,0,
|
||||
0.5f, 0.5f, 0.5f, 0.5f, 0,1,0, 1,1,
|
||||
0.5f,0.5f, -0.5f, 0.5f, 0,1,0, 0,1,
|
||||
};
|
||||
|
||||
|
||||
///position xyz, unused w, normal, uv
|
||||
static const float cube_vertices2[] =
|
||||
{
|
||||
-1.5f, -0.5f, 0.5f, 0.0f, 0,0,1, 0,0,//0
|
||||
1.5f, -0.5f, 0.5f, 0.0f, 0,0,1, 1,0,//1
|
||||
1.5f, 0.5f, 0.5f, 0.0f, 0,0,1, 1,1,//2
|
||||
-1.5f, 0.5f, 0.5f, 0.0f, 0,0,1, 0,1 ,//3
|
||||
|
||||
-1.5f, -0.5f, -0.5f, 0.5f, 0,0,-1, 0,0,//4
|
||||
1.5f, -0.5f, -0.5f, 0.5f, 0,0,-1, 1,0,//5
|
||||
1.5f, 0.5f, -0.5f, 0.5f, 0,0,-1, 1,1,//6
|
||||
-1.5f, 0.5f, -0.5f, 0.5f, 0,0,-1, 0,1,//7
|
||||
|
||||
-1.5f, -0.5f, -0.5f, 0.5f, -1,0,0, 0,0,
|
||||
-1.5f, 0.5f, -0.5f, 0.5f, -1,0,0, 1,0,
|
||||
-1.5f, 0.5f, 0.5f, 0.5f, -1,0,0, 1,1,
|
||||
-1.5f, -0.5f, 0.5f, 0.5f, -1,0,0, 0,1,
|
||||
|
||||
1.5f, -0.5f, -0.5f, 0.5f, 1,0,0, 0,0,
|
||||
1.5f, 0.5f, -0.5f, 0.5f, 1,0,0, 1,0,
|
||||
1.5f, 0.5f, 0.5f, 0.5f, 1,0,0, 1,1,
|
||||
1.5f, -0.5f, 0.5f, 0.5f, 1,0,0, 0,1,
|
||||
|
||||
-1.5f, -0.5f, -0.5f, 0.5f, 0,-1,0, 0,0,
|
||||
-1.5f, -0.5f, 0.5f, 0.5f, 0,-1,0, 1,0,
|
||||
1.5f, -0.5f, 0.5f, 0.5f, 0,-1,0, 1,1,
|
||||
1.5f, -0.5f, -0.5f, 0.5f, 0,-1,0, 0,1,
|
||||
|
||||
-1.5f, 0.5f, -0.5f, 0.5f, 0,1,0, 0,0,
|
||||
-1.5f, 0.5f, 0.5f, 0.5f, 0,1,0, 1,0,
|
||||
1.5f, 0.5f, 0.5f, 0.5f, 0,1,0, 1,1,
|
||||
1.5f, 0.5f, -0.5f, 0.5f, 0,1,0, 0,1,
|
||||
};
|
||||
|
||||
|
||||
static const int cube_indices[]=
|
||||
{
|
||||
0,1,2,0,2,3,//ground face
|
||||
4,5,6,4,6,7,//top face
|
||||
8,9,10,8,10,11,
|
||||
12,13,14,12,14,15,
|
||||
16,17,18,16,18,19,
|
||||
20,21,22,20,22,23
|
||||
};
|
||||
|
||||
#endif //SHAPE_DATA_H
|
||||
@@ -0,0 +1,465 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Erwin Coumans
|
||||
|
||||
|
||||
#include "Win32OpenGLRenderManager.h"
|
||||
|
||||
#include <windows.h>
|
||||
#include <GL/gl.h>
|
||||
|
||||
static InternalData2* sData = 0;
|
||||
|
||||
struct InternalData2
|
||||
{
|
||||
HWND m_hWnd;;
|
||||
int m_width;
|
||||
int m_height;
|
||||
HDC m_hDC;
|
||||
HGLRC m_hRC;
|
||||
bool m_OpenGLInitialized;
|
||||
int m_oldScreenWidth;
|
||||
int m_oldHeight;
|
||||
int m_oldBitsPerPel;
|
||||
bool m_quit;
|
||||
|
||||
|
||||
InternalData2()
|
||||
{
|
||||
m_hWnd = 0;
|
||||
m_width = 0;
|
||||
m_height = 0;
|
||||
m_hDC = 0;
|
||||
m_hRC = 0;
|
||||
m_OpenGLInitialized = false;
|
||||
m_oldScreenWidth = 0;
|
||||
m_oldHeight = 0;
|
||||
m_oldBitsPerPel = 0;
|
||||
m_quit = false;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
void Win32OpenGLWindow::enableOpenGL()
|
||||
{
|
||||
|
||||
|
||||
|
||||
PIXELFORMATDESCRIPTOR pfd;
|
||||
int format;
|
||||
|
||||
// get the device context (DC)
|
||||
m_data->m_hDC = GetDC( m_data->m_hWnd );
|
||||
|
||||
// set the pixel format for the DC
|
||||
ZeroMemory( &pfd, sizeof( pfd ) );
|
||||
pfd.nSize = sizeof( pfd );
|
||||
pfd.nVersion = 1;
|
||||
pfd.dwFlags = PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL | PFD_DOUBLEBUFFER;
|
||||
pfd.iPixelType = PFD_TYPE_RGBA;
|
||||
pfd.cColorBits = 24;
|
||||
pfd.cDepthBits = 16;
|
||||
pfd.cStencilBits = 1;
|
||||
pfd.iLayerType = PFD_MAIN_PLANE;
|
||||
format = ChoosePixelFormat( m_data->m_hDC, &pfd );
|
||||
SetPixelFormat( m_data->m_hDC, format, &pfd );
|
||||
|
||||
// create and enable the render context (RC)
|
||||
m_data->m_hRC = wglCreateContext( m_data->m_hDC );
|
||||
wglMakeCurrent( m_data->m_hDC, m_data->m_hRC );
|
||||
m_data->m_OpenGLInitialized = true;
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
void Win32OpenGLWindow::disableOpenGL()
|
||||
{
|
||||
m_data->m_OpenGLInitialized = false;
|
||||
|
||||
wglMakeCurrent( NULL, NULL );
|
||||
wglDeleteContext( m_data->m_hRC );
|
||||
ReleaseDC( m_data->m_hWnd, m_data->m_hDC );
|
||||
}
|
||||
|
||||
void Win32OpenGLWindow::pumpMessage()
|
||||
{
|
||||
MSG msg;
|
||||
// check for messages
|
||||
if ( PeekMessage( &msg, NULL, 0, 0, PM_REMOVE ) )
|
||||
{
|
||||
|
||||
// handle or dispatch messages
|
||||
if ( msg.message == WM_QUIT )
|
||||
{
|
||||
m_data->m_quit = TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
TranslateMessage( &msg );
|
||||
DispatchMessage( &msg );
|
||||
}
|
||||
|
||||
// gDemoApplication->displayCallback();
|
||||
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
|
||||
LRESULT CALLBACK WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam)
|
||||
{
|
||||
switch (message)
|
||||
{
|
||||
case WM_PAINT:
|
||||
{
|
||||
PAINTSTRUCT ps;
|
||||
BeginPaint(hWnd, &ps);
|
||||
EndPaint(hWnd, &ps);
|
||||
}
|
||||
return 0;
|
||||
|
||||
case WM_ERASEBKGND:
|
||||
return 0;
|
||||
|
||||
case WM_DESTROY:
|
||||
PostQuitMessage(0);
|
||||
return 0;
|
||||
|
||||
case WM_KEYDOWN:
|
||||
{
|
||||
switch ( wParam )
|
||||
{
|
||||
case 'Q':
|
||||
case VK_ESCAPE:
|
||||
{
|
||||
PostQuitMessage(0);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case WM_SIZE: // Size Action Has Taken Place
|
||||
|
||||
switch (wParam) // Evaluate Size Action
|
||||
{
|
||||
case SIZE_MINIMIZED: // Was Window Minimized?
|
||||
return 0; // Return
|
||||
|
||||
case SIZE_MAXIMIZED: // Was Window Maximized?
|
||||
|
||||
sData->m_width = LOWORD (lParam);
|
||||
sData->m_height = HIWORD (lParam);
|
||||
//if (sOpenGLInitialized)
|
||||
//{
|
||||
// //gDemoApplication->reshape(sWidth,sHeight);
|
||||
//}
|
||||
glViewport(0, 0, sData->m_width, sData->m_height);
|
||||
return 0; // Return
|
||||
|
||||
case SIZE_RESTORED: // Was Window Restored?
|
||||
sData->m_width = LOWORD (lParam);
|
||||
sData->m_height = HIWORD (lParam);
|
||||
//if (sOpenGLInitialized)
|
||||
//{
|
||||
// gDemoApplication->reshape(sWidth,sHeight);
|
||||
//}
|
||||
glViewport(0, 0, sData->m_width, sData->m_height);
|
||||
return 0; // Return
|
||||
}
|
||||
break;
|
||||
|
||||
default:{
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
return DefWindowProc(hWnd, message, wParam, lParam);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void Win32OpenGLWindow::init(int width,int height, bool fullscreen,int colorBitsPerPixel, void* windowHandle)
|
||||
{
|
||||
// get handle to exe file
|
||||
HINSTANCE hInstance = GetModuleHandle(0);
|
||||
|
||||
// create the window if we need to and we do not use the null device
|
||||
if (!windowHandle)
|
||||
{
|
||||
const char* ClassName = "DeviceWin32";
|
||||
|
||||
// Register Class
|
||||
WNDCLASSEX wcex;
|
||||
wcex.cbSize = sizeof(WNDCLASSEX);
|
||||
wcex.style = CS_HREDRAW | CS_VREDRAW;
|
||||
wcex.lpfnWndProc = WndProc;
|
||||
wcex.cbClsExtra = 0;
|
||||
wcex.cbWndExtra = 0;
|
||||
wcex.hInstance = hInstance;
|
||||
wcex.hIcon = LoadIcon( NULL, IDI_APPLICATION ); //(HICON)LoadImage(hInstance, "bullet_ico.ico", IMAGE_ICON, 0,0, LR_LOADTRANSPARENT);//LR_LOADFROMFILE);
|
||||
wcex.hCursor = LoadCursor(NULL, IDC_ARROW);
|
||||
wcex.hbrBackground = (HBRUSH)(COLOR_WINDOW+1);
|
||||
wcex.lpszMenuName = 0;
|
||||
wcex.lpszClassName = ClassName;
|
||||
wcex.hIconSm = 0;
|
||||
|
||||
// if there is an icon, load it
|
||||
wcex.hIcon = (HICON)LoadImage(hInstance, "irrlicht.ico", IMAGE_ICON, 0,0, LR_LOADFROMFILE);
|
||||
|
||||
RegisterClassEx(&wcex);
|
||||
|
||||
// calculate client size
|
||||
|
||||
RECT clientSize;
|
||||
clientSize.top = 0;
|
||||
clientSize.left = 0;
|
||||
clientSize.right = width;
|
||||
clientSize.bottom = height;
|
||||
|
||||
DWORD style = WS_POPUP;
|
||||
|
||||
if (!fullscreen)
|
||||
style = WS_SYSMENU | WS_BORDER | WS_CAPTION | WS_CLIPCHILDREN | WS_CLIPSIBLINGS | WS_MINIMIZEBOX | WS_MAXIMIZEBOX | WS_SIZEBOX;
|
||||
|
||||
AdjustWindowRect(&clientSize, style, FALSE);
|
||||
|
||||
m_data->m_width = clientSize.right - clientSize.left;
|
||||
m_data->m_height = clientSize.bottom - clientSize.top;
|
||||
|
||||
int windowLeft = (GetSystemMetrics(SM_CXSCREEN) - m_data->m_width) / 2;
|
||||
int windowTop = (GetSystemMetrics(SM_CYSCREEN) - m_data->m_height) / 2;
|
||||
|
||||
if (fullscreen)
|
||||
{
|
||||
windowLeft = 0;
|
||||
windowTop = 0;
|
||||
}
|
||||
|
||||
// create window
|
||||
|
||||
m_data->m_hWnd = CreateWindow( ClassName, "", style, windowLeft, windowTop,
|
||||
m_data->m_width, m_data->m_height, NULL, NULL, hInstance, NULL);
|
||||
|
||||
ShowWindow(m_data->m_hWnd, SW_SHOW);
|
||||
UpdateWindow(m_data->m_hWnd);
|
||||
|
||||
MoveWindow(m_data->m_hWnd, windowLeft, windowTop, m_data->m_width, m_data->m_height, TRUE);
|
||||
}
|
||||
else if (windowHandle)
|
||||
{
|
||||
// attach external window
|
||||
m_data->m_hWnd = static_cast<HWND>(windowHandle);
|
||||
RECT r;
|
||||
GetWindowRect(m_data->m_hWnd, &r);
|
||||
m_data->m_width = r.right - r.left;
|
||||
m_data->m_height = r.bottom - r.top;
|
||||
//sFullScreen = false;
|
||||
//sExternalWindow = true;
|
||||
}
|
||||
|
||||
|
||||
if (fullscreen)
|
||||
{
|
||||
DEVMODE dm;
|
||||
memset(&dm, 0, sizeof(dm));
|
||||
dm.dmSize = sizeof(dm);
|
||||
// use default values from current setting
|
||||
EnumDisplaySettings(NULL, ENUM_CURRENT_SETTINGS, &dm);
|
||||
m_data->m_oldScreenWidth = dm.dmPelsWidth;
|
||||
m_data->m_oldHeight = dm.dmPelsHeight;
|
||||
m_data->m_oldBitsPerPel = dm.dmBitsPerPel;
|
||||
|
||||
dm.dmPelsWidth = width;
|
||||
dm.dmPelsHeight = height;
|
||||
if (colorBitsPerPixel)
|
||||
{
|
||||
dm.dmBitsPerPel = colorBitsPerPixel;
|
||||
}
|
||||
dm.dmFields = DM_BITSPERPEL | DM_PELSWIDTH | DM_PELSHEIGHT | DM_DISPLAYFREQUENCY;
|
||||
|
||||
LONG res = ChangeDisplaySettings(&dm, CDS_FULLSCREEN);
|
||||
if (res != DISP_CHANGE_SUCCESSFUL)
|
||||
{ // try again without forcing display frequency
|
||||
dm.dmFields = DM_BITSPERPEL | DM_PELSWIDTH | DM_PELSHEIGHT;
|
||||
res = ChangeDisplaySettings(&dm, CDS_FULLSCREEN);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//VideoDriver = video::createOpenGLDriver(CreationParams, FileSystem, this);
|
||||
enableOpenGL();
|
||||
|
||||
|
||||
const wchar_t* text= L"OpenCL rigid body demo";
|
||||
|
||||
DWORD dwResult;
|
||||
|
||||
#ifdef _WIN64
|
||||
SetWindowTextW(m_data->m_hWnd, text);
|
||||
#else
|
||||
SendMessageTimeoutW(m_data->m_hWnd, WM_SETTEXT, 0,
|
||||
reinterpret_cast<LPARAM>(text),
|
||||
SMTO_ABORTIFHUNG, 2000, &dwResult);
|
||||
#endif
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
void Win32OpenGLWindow::switchFullScreen(bool fullscreen,int width,int height,int colorBitsPerPixel)
|
||||
{
|
||||
LONG res;
|
||||
DEVMODE dm;
|
||||
memset(&dm, 0, sizeof(dm));
|
||||
dm.dmSize = sizeof(dm);
|
||||
// use default values from current setting
|
||||
EnumDisplaySettings(NULL, ENUM_CURRENT_SETTINGS, &dm);
|
||||
|
||||
dm.dmFields = DM_BITSPERPEL | DM_PELSWIDTH | DM_PELSHEIGHT | DM_DISPLAYFREQUENCY;
|
||||
|
||||
if (fullscreen && !m_data->m_oldScreenWidth)
|
||||
{
|
||||
m_data->m_oldScreenWidth = dm.dmPelsWidth;
|
||||
m_data->m_oldHeight = dm.dmPelsHeight;
|
||||
m_data->m_oldBitsPerPel = dm.dmBitsPerPel;
|
||||
|
||||
if (width && height)
|
||||
{
|
||||
dm.dmPelsWidth = width;
|
||||
dm.dmPelsHeight = height;
|
||||
} else
|
||||
{
|
||||
dm.dmPelsWidth = m_data->m_width;
|
||||
dm.dmPelsHeight = m_data->m_height;
|
||||
}
|
||||
if (colorBitsPerPixel)
|
||||
{
|
||||
dm.dmBitsPerPel = colorBitsPerPixel;
|
||||
}
|
||||
} else
|
||||
{
|
||||
if (m_data->m_oldScreenWidth)
|
||||
{
|
||||
dm.dmPelsWidth = m_data->m_oldScreenWidth;
|
||||
dm.dmPelsHeight= m_data->m_oldHeight;
|
||||
dm.dmBitsPerPel = m_data->m_oldBitsPerPel;
|
||||
}
|
||||
}
|
||||
|
||||
if (fullscreen)
|
||||
{
|
||||
res = ChangeDisplaySettings(&dm, CDS_FULLSCREEN);
|
||||
} else
|
||||
{
|
||||
res = ChangeDisplaySettings(&dm, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
Win32OpenGLWindow::Win32OpenGLWindow()
|
||||
{
|
||||
m_data = new InternalData2();
|
||||
sData = m_data;
|
||||
}
|
||||
|
||||
Win32OpenGLWindow::~Win32OpenGLWindow()
|
||||
{
|
||||
delete m_data;
|
||||
sData = 0;
|
||||
}
|
||||
|
||||
void Win32OpenGLWindow::init()
|
||||
{
|
||||
init(640,480,false);
|
||||
}
|
||||
|
||||
|
||||
void Win32OpenGLWindow::exit()
|
||||
{
|
||||
disableOpenGL();
|
||||
DestroyWindow(this->m_data->m_hWnd);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void Win32OpenGLWindow::startRendering()
|
||||
{
|
||||
pumpMessage();
|
||||
|
||||
//glClearColor(1.f,0.f,0.f,1.f);
|
||||
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); //clear buffers
|
||||
|
||||
//glCullFace(GL_BACK);
|
||||
//glFrontFace(GL_CCW);
|
||||
glEnable(GL_DEPTH_TEST);
|
||||
|
||||
|
||||
float aspect;
|
||||
//btVector3 extents;
|
||||
|
||||
if (m_data->m_width > m_data->m_height)
|
||||
{
|
||||
aspect = (float)m_data->m_width / (float)m_data->m_height;
|
||||
//extents.setValue(aspect * 1.0f, 1.0f,0);
|
||||
} else
|
||||
{
|
||||
aspect = (float)m_data->m_height / (float)m_data->m_width;
|
||||
//extents.setValue(1.0f, aspect*1.f,0);
|
||||
}
|
||||
|
||||
glMatrixMode(GL_PROJECTION);
|
||||
glLoadIdentity();
|
||||
|
||||
if (m_data->m_width > m_data->m_height)
|
||||
{
|
||||
glFrustum (-aspect, aspect, -1.0, 1.0, 1.0, 10000.0);
|
||||
} else
|
||||
{
|
||||
glFrustum (-1.0, 1.0, -aspect, aspect, 1.0, 10000.0);
|
||||
}
|
||||
glMatrixMode(GL_MODELVIEW);
|
||||
glLoadIdentity();
|
||||
|
||||
}
|
||||
|
||||
|
||||
void Win32OpenGLWindow::renderAllObjects()
|
||||
{
|
||||
}
|
||||
|
||||
void Win32OpenGLWindow::endRendering()
|
||||
{
|
||||
SwapBuffers( m_data->m_hDC );
|
||||
}
|
||||
|
||||
float Win32OpenGLWindow::getTimeInSeconds()
|
||||
{
|
||||
return 0.f;
|
||||
}
|
||||
|
||||
void Win32OpenGLWindow::setDebugMessage(int x,int y,const char* message)
|
||||
{
|
||||
}
|
||||
|
||||
bool Win32OpenGLWindow::requestedExit()
|
||||
{
|
||||
return m_data->m_quit;
|
||||
}
|
||||
@@ -0,0 +1,70 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Erwin Coumans
|
||||
|
||||
|
||||
#ifndef _WIN32_OPENGL_RENDER_MANAGER_H
|
||||
#define _WIN32_OPENGL_RENDER_MANAGER_H
|
||||
|
||||
|
||||
#define RM_DECLARE_HANDLE(name) typedef struct name##__ { int unused; } *name
|
||||
|
||||
RM_DECLARE_HANDLE(RenderObjectHandle);
|
||||
|
||||
struct InternalData2;
|
||||
|
||||
class Win32OpenGLWindow
|
||||
{
|
||||
protected:
|
||||
|
||||
struct InternalData2* m_data;
|
||||
|
||||
void enableOpenGL();
|
||||
|
||||
void disableOpenGL();
|
||||
|
||||
void pumpMessage();
|
||||
|
||||
|
||||
|
||||
public:
|
||||
|
||||
Win32OpenGLWindow();
|
||||
|
||||
virtual ~Win32OpenGLWindow();
|
||||
|
||||
virtual void init(); //default implementation uses default settings for width/height/fullscreen
|
||||
|
||||
void init(int width,int height, bool fullscreen=false, int colorBitsPerPixel=0, void* windowHandle=0);
|
||||
|
||||
void switchFullScreen(bool fullscreen,int width=0,int height=0,int colorBitsPerPixel=0);
|
||||
|
||||
virtual void exit();
|
||||
|
||||
|
||||
virtual void startRendering();
|
||||
|
||||
virtual void renderAllObjects();
|
||||
|
||||
virtual void endRendering();
|
||||
|
||||
virtual float getTimeInSeconds();
|
||||
|
||||
virtual void setDebugMessage(int x,int y,const char* message);
|
||||
|
||||
virtual bool requestedExit();
|
||||
|
||||
};
|
||||
|
||||
#endif //_WIN32_OPENGL_RENDER_MANAGER_H
|
||||
@@ -0,0 +1,224 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Erwin Coumans
|
||||
|
||||
//
|
||||
//#include "vld.h"
|
||||
#include <GL/glew.h>
|
||||
|
||||
#include "GLInstancingRenderer.h"
|
||||
|
||||
|
||||
#include "GLInstancingRenderer.h"
|
||||
#include "../opengl_interop/btOpenCLGLInteropBuffer.h"
|
||||
#include "Win32OpenGLRenderManager.h"
|
||||
#include "CLPhysicsDemo.h"
|
||||
#include "../broadphase_benchmark/btGridBroadphaseCl.h"
|
||||
#include "../../opencl/gpu_rigidbody_pipeline/btGpuNarrowPhaseAndSolver.h"
|
||||
#include "ShapeData.h"
|
||||
#include "LinearMath/btQuickprof.h"
|
||||
|
||||
int NUM_OBJECTS_X = 32;
|
||||
int NUM_OBJECTS_Y = 24;
|
||||
int NUM_OBJECTS_Z = 32;
|
||||
|
||||
|
||||
float X_GAP = 2.f;
|
||||
float Y_GAP = 2.f;
|
||||
float Z_GAP = 2.f;
|
||||
|
||||
extern int numPairsOut;
|
||||
|
||||
|
||||
void createScene(GLInstancingRenderer& renderer,CLPhysicsDemo& physicsSim)
|
||||
{
|
||||
int strideInBytes = sizeof(float)*9;
|
||||
|
||||
int barrelShapeIndex = -1;
|
||||
int cubeShapeIndex = -1;
|
||||
|
||||
float position[4]={0,0,0,0};
|
||||
float orn[4] = {0,0,0,1};
|
||||
float color[4] = {1,1,1,1};
|
||||
int index=0;
|
||||
#if 1
|
||||
{
|
||||
int numVertices = sizeof(barrel_vertices)/strideInBytes;
|
||||
int numIndices = sizeof(barrel_indices)/sizeof(int);
|
||||
barrelShapeIndex = renderer.registerShape(&barrel_vertices[0],numVertices,barrel_indices,numIndices);
|
||||
}
|
||||
|
||||
|
||||
float barrelScaling[4] = {2,2,2,1};
|
||||
|
||||
|
||||
int barrelCollisionShapeIndex = physicsSim.registerCollisionShape(&barrel_vertices[0],strideInBytes, sizeof(barrel_vertices)/strideInBytes,&barrelScaling[0]);
|
||||
|
||||
|
||||
|
||||
for (int i=0;i<NUM_OBJECTS_X;i++)
|
||||
{
|
||||
for (int j=0;j<(NUM_OBJECTS_Y/2);j++)
|
||||
{
|
||||
for (int k=0;k<NUM_OBJECTS_Z;k++)
|
||||
{
|
||||
float mass = j? 1.f : 0.f;
|
||||
|
||||
position[0]=(i*X_GAP-NUM_OBJECTS_X/2)+5;
|
||||
position[1]=(j*Y_GAP*2-NUM_OBJECTS_Y/2);
|
||||
position[2]=(k*Z_GAP-NUM_OBJECTS_Z/2)-NUM_OBJECTS_Z*3;
|
||||
position[3] = 1.f;
|
||||
|
||||
renderer.registerGraphicsInstance(barrelShapeIndex,position,orn,color,barrelScaling);
|
||||
void* ptr = (void*) index;
|
||||
physicsSim.registerPhysicsInstance(mass, position, orn, barrelCollisionShapeIndex,ptr);
|
||||
|
||||
index++;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
float cubeScaling[4] = {2,2,2,1};
|
||||
int cubeCollisionShapeIndex = physicsSim.registerCollisionShape(&cube_vertices[0],strideInBytes, sizeof(cube_vertices)/strideInBytes,&cubeScaling[0]);
|
||||
|
||||
|
||||
{
|
||||
int numVertices = sizeof(cube_vertices)/strideInBytes;
|
||||
int numIndices = sizeof(cube_indices)/sizeof(int);
|
||||
cubeShapeIndex = renderer.registerShape(&cube_vertices[0],numVertices,cube_indices,numIndices);
|
||||
}
|
||||
|
||||
for (int i=0;i<NUM_OBJECTS_X;i++)
|
||||
{
|
||||
for (int j=0;j<NUM_OBJECTS_Y/2;j++)
|
||||
{
|
||||
for (int k=0;k<NUM_OBJECTS_Z;k++)
|
||||
{
|
||||
float mass = 1.f;//j? 1.f : 0.f;
|
||||
|
||||
position[0]=(i*X_GAP-NUM_OBJECTS_X/2)+(j&1);
|
||||
position[1]=(j*Y_GAP-NUM_OBJECTS_Y/2);
|
||||
position[2]=(k*Z_GAP-NUM_OBJECTS_Z/2)+(j&1);
|
||||
position[3] = 1.f;
|
||||
|
||||
renderer.registerGraphicsInstance(cubeShapeIndex,position,orn,color,cubeScaling);
|
||||
void* ptr = (void*) index;
|
||||
physicsSim.registerPhysicsInstance(mass, position, orn, cubeCollisionShapeIndex,ptr);
|
||||
|
||||
index++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (1)
|
||||
{
|
||||
//add some 'special' plane shape
|
||||
void* ptr = (void*) index;
|
||||
position[0] = 0.f;
|
||||
position[1] = -NUM_OBJECTS_Y/2-1;
|
||||
position[2] = 0.f;
|
||||
position[3] = 1.f;
|
||||
|
||||
physicsSim.registerPhysicsInstance(0.f,position, orn, -1,ptr);
|
||||
color[0] = 1.f;
|
||||
color[1] = 0.f;
|
||||
color[2] = 0.f;
|
||||
cubeScaling[0] = 5000.f;
|
||||
cubeScaling[1] = 0.01f;
|
||||
cubeScaling[2] = 5000.f;
|
||||
|
||||
renderer.registerGraphicsInstance(cubeShapeIndex,position,orn,color,cubeScaling);
|
||||
}
|
||||
physicsSim.writeBodiesToGpu();
|
||||
|
||||
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
|
||||
Win32OpenGLWindow* window = new Win32OpenGLWindow();
|
||||
|
||||
window->init(1024,768);
|
||||
GLenum err = glewInit();
|
||||
window->startRendering();
|
||||
window->endRendering();
|
||||
|
||||
GLInstancingRenderer render;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
CLPhysicsDemo demo(window);
|
||||
|
||||
bool useInterop = true;
|
||||
demo.init(-1,-1,useInterop);
|
||||
|
||||
render.InitShaders();
|
||||
|
||||
if (useInterop)
|
||||
demo.setupInterop();
|
||||
|
||||
createScene(render, demo);
|
||||
|
||||
|
||||
printf("num objects = %d\n", NUM_OBJECTS_X*NUM_OBJECTS_Y*NUM_OBJECTS_Z);
|
||||
|
||||
|
||||
render.writeTransforms();
|
||||
|
||||
|
||||
while (!window->requestedExit())
|
||||
{
|
||||
CProfileManager::Reset();
|
||||
|
||||
demo.stepSimulation();
|
||||
|
||||
|
||||
window->startRendering();
|
||||
render.RenderScene();
|
||||
window->endRendering();
|
||||
|
||||
CProfileManager::Increment_Frame_Counter();
|
||||
|
||||
static bool printStats = true;
|
||||
|
||||
if (printStats)
|
||||
{
|
||||
static int count = 10;
|
||||
count--;
|
||||
if (count<0)
|
||||
{
|
||||
CProfileManager::dumpAll();
|
||||
//printf("total broadphase pairs= %d\n", gFpIO.m_numOverlap);
|
||||
printf("numPairsOut (culled) = %d\n", numPairsOut);
|
||||
printStats = false;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
demo.cleanup();
|
||||
|
||||
render.CleanupShaders();
|
||||
window->exit();
|
||||
delete window;
|
||||
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
|
||||
include "AMD"
|
||||
-- include "Intel"
|
||||
-- include "NVIDIA"
|
||||
|
||||
@@ -0,0 +1,34 @@
|
||||
|
||||
hasCL = findOpenCL_AMD()
|
||||
|
||||
if (hasCL) then
|
||||
|
||||
project "OpenCL_integration_AMD"
|
||||
|
||||
initOpenCL_AMD()
|
||||
|
||||
language "C++"
|
||||
|
||||
kind "ConsoleApp"
|
||||
targetdir "../../../bin"
|
||||
|
||||
initOpenGL()
|
||||
initGlut()
|
||||
initGlew()
|
||||
|
||||
includedirs {
|
||||
"../../../rendering/BulletMath",
|
||||
"../../primitives"
|
||||
}
|
||||
|
||||
files {
|
||||
"../main.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.h",
|
||||
"../../opengl_interop/btOpenCLGLInteropBuffer.cpp",
|
||||
"../../opengl_interop/btOpenCLGLInteropBuffer.h",
|
||||
"../../opengl_interop/btStopwatch.cpp",
|
||||
"../../opengl_interop/btStopwatch.h"
|
||||
}
|
||||
|
||||
end
|
||||
@@ -0,0 +1,36 @@
|
||||
|
||||
hasCL = findOpenCL_Intel()
|
||||
|
||||
if (hasCL) then
|
||||
|
||||
project "OpenCL_integration_Intel"
|
||||
|
||||
initOpenCL_Intel()
|
||||
|
||||
language "C++"
|
||||
|
||||
kind "ConsoleApp"
|
||||
targetdir "../../../bin"
|
||||
|
||||
|
||||
initOpenGL()
|
||||
initGlut()
|
||||
initGlew()
|
||||
|
||||
|
||||
includedirs {
|
||||
"../../../rendering/BulletMath",
|
||||
"../../primitives"
|
||||
}
|
||||
|
||||
files {
|
||||
"../main.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.h",
|
||||
"../../opengl_interop/btOpenCLGLInteropBuffer.cpp",
|
||||
"../../opengl_interop/btOpenCLGLInteropBuffer.h",
|
||||
"../../opengl_interop/btStopwatch.cpp",
|
||||
"../../opengl_interop/btStopwatch.h"
|
||||
}
|
||||
|
||||
end
|
||||
@@ -0,0 +1,35 @@
|
||||
|
||||
hasCL = findOpenCL_NVIDIA()
|
||||
|
||||
if (hasCL) then
|
||||
|
||||
project "OpenCL_integration_NVIDIA"
|
||||
|
||||
initOpenCL_NVIDIA()
|
||||
|
||||
language "C++"
|
||||
|
||||
kind "ConsoleApp"
|
||||
targetdir "../../../bin"
|
||||
|
||||
initOpenGL()
|
||||
initGlut()
|
||||
initGlew()
|
||||
|
||||
|
||||
includedirs {
|
||||
"../../../rendering/BulletMath",
|
||||
"../../primitives"
|
||||
}
|
||||
|
||||
files {
|
||||
"../main.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.h",
|
||||
"../../opengl_interop/btOpenCLGLInteropBuffer.cpp",
|
||||
"../../opengl_interop/btOpenCLGLInteropBuffer.h",
|
||||
"../../opengl_interop/btStopwatch.cpp",
|
||||
"../../opengl_interop/btStopwatch.h"
|
||||
}
|
||||
|
||||
end
|
||||
@@ -0,0 +1,73 @@
|
||||
MSTRINGIFY(
|
||||
|
||||
float4 quatMult(float4 q1, float4 q2)
|
||||
{
|
||||
float4 q;
|
||||
q.x = q1.w * q2.x + q1.x * q2.w + q1.y * q2.z - q1.z * q2.y;
|
||||
q.y = q1.w * q2.y + q1.y * q2.w + q1.z * q2.x - q1.x * q2.z;
|
||||
q.z = q1.w * q2.z + q1.z * q2.w + q1.x * q2.y - q1.y * q2.x;
|
||||
q.w = q1.w * q2.w - q1.x * q2.x - q1.y * q2.y - q1.z * q2.z;
|
||||
return q;
|
||||
}
|
||||
|
||||
float4 quatNorm(float4 q)
|
||||
{
|
||||
float len = native_sqrt(dot(q, q));
|
||||
if(len > 0.f)
|
||||
{
|
||||
q *= 1.f / len;
|
||||
}
|
||||
else
|
||||
{
|
||||
q.x = q.y = q.z = 0.f;
|
||||
q.w = 1.f;
|
||||
}
|
||||
return q;
|
||||
}
|
||||
|
||||
|
||||
|
||||
__kernel void
|
||||
interopKernel( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer,
|
||||
__global float4 *linVel,
|
||||
__global float4 *pAngVel)
|
||||
{
|
||||
int nodeID = get_global_id(0);
|
||||
float timeStep = 0.0166666;
|
||||
|
||||
float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254);
|
||||
|
||||
if( nodeID < numNodes )
|
||||
{
|
||||
g_vertexBuffer[nodeID + startOffset/4] += linVel[nodeID]*timeStep;
|
||||
|
||||
// g_vertexBuffer[nodeID + startOffset/4+numNodes] += angVel[nodeID];
|
||||
|
||||
float4 axis;
|
||||
float4 angvel = pAngVel[nodeID];
|
||||
float fAngle = native_sqrt(dot(angvel, angvel));
|
||||
//limit the angular motion
|
||||
if(fAngle*timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD)
|
||||
{
|
||||
fAngle = BT_GPU_ANGULAR_MOTION_THRESHOLD / timeStep;
|
||||
}
|
||||
if(fAngle < 0.001f)
|
||||
{
|
||||
// use Taylor's expansions of sync function
|
||||
axis = angvel * (0.5f*timeStep-(timeStep*timeStep*timeStep)*0.020833333333f * fAngle * fAngle);
|
||||
}
|
||||
else
|
||||
{
|
||||
// sync(fAngle) = sin(c*fAngle)/t
|
||||
axis = angvel * ( native_sin(0.5f * fAngle * timeStep) / fAngle);
|
||||
}
|
||||
float4 dorn = axis;
|
||||
dorn.w = native_cos(fAngle * timeStep * 0.5f);
|
||||
float4 orn0 = g_vertexBuffer[nodeID + startOffset/4+numNodes];
|
||||
float4 predictedOrn = quatMult(dorn, orn0);
|
||||
predictedOrn = quatNorm(predictedOrn);
|
||||
g_vertexBuffer[nodeID + startOffset/4+numNodes]=predictedOrn;
|
||||
}
|
||||
}
|
||||
|
||||
);
|
||||
1106
Extras/RigidBodyGpuPipeline/opencl/integration/main.cpp
Normal file
1106
Extras/RigidBodyGpuPipeline/opencl/integration/main.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,5 @@
|
||||
|
||||
include "AMD"
|
||||
include "Intel"
|
||||
include "NVIDIA"
|
||||
|
||||
@@ -0,0 +1,33 @@
|
||||
|
||||
hasCL = findOpenCL_AMD()
|
||||
|
||||
if (hasCL) then
|
||||
|
||||
project "OpenCL_GL_interop_AMD"
|
||||
|
||||
initOpenCL_AMD()
|
||||
|
||||
language "C++"
|
||||
|
||||
kind "ConsoleApp"
|
||||
targetdir "../../../bin"
|
||||
|
||||
initOpenGL()
|
||||
initGlut()
|
||||
initGlew()
|
||||
|
||||
includedirs {
|
||||
"../../../rendering/BulletMath"
|
||||
}
|
||||
|
||||
files {
|
||||
"../main.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.h",
|
||||
"../btOpenCLGLInteropBuffer.cpp",
|
||||
"../btOpenCLGLInteropBuffer.h",
|
||||
"../btStopwatch.cpp",
|
||||
"../btStopwatch.h"
|
||||
}
|
||||
|
||||
end
|
||||
@@ -0,0 +1,34 @@
|
||||
|
||||
hasCL = findOpenCL_Intel()
|
||||
|
||||
if (hasCL) then
|
||||
|
||||
project "OpenCL_GL_interop_Intel"
|
||||
|
||||
initOpenCL_Intel()
|
||||
|
||||
language "C++"
|
||||
|
||||
kind "ConsoleApp"
|
||||
targetdir "../../../bin"
|
||||
|
||||
initOpenGL()
|
||||
initGlut()
|
||||
initGlew()
|
||||
|
||||
includedirs {
|
||||
|
||||
"../../../rendering/BulletMath"
|
||||
}
|
||||
|
||||
files {
|
||||
"../main.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.h",
|
||||
"../btOpenCLGLInteropBuffer.cpp",
|
||||
"../btOpenCLGLInteropBuffer.h",
|
||||
"../btStopwatch.cpp",
|
||||
"../btStopwatch.h"
|
||||
}
|
||||
|
||||
end
|
||||
@@ -0,0 +1,34 @@
|
||||
|
||||
hasCL = findOpenCL_NVIDIA()
|
||||
|
||||
if (hasCL) then
|
||||
|
||||
project "OpenCL_GL_interop_NVIDIA"
|
||||
|
||||
initOpenCL_NVIDIA()
|
||||
|
||||
language "C++"
|
||||
|
||||
kind "ConsoleApp"
|
||||
targetdir "../../../bin"
|
||||
|
||||
initOpenGL()
|
||||
initGlut()
|
||||
initGlew()
|
||||
|
||||
|
||||
includedirs {
|
||||
"../../../rendering/BulletMath"
|
||||
}
|
||||
|
||||
files {
|
||||
"../main.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.h",
|
||||
"../btOpenCLGLInteropBuffer.cpp",
|
||||
"../btOpenCLGLInteropBuffer.h",
|
||||
"../btStopwatch.cpp",
|
||||
"../btStopwatch.h"
|
||||
}
|
||||
|
||||
end
|
||||
@@ -0,0 +1,60 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library
|
||||
Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
///original author: Erwin Coumans
|
||||
|
||||
#include "btOpenCLGLInteropBuffer.h"
|
||||
|
||||
btOpenCLGLInteropBuffer::btOpenCLGLInteropBuffer(cl_context clContext, cl_command_queue commandQueue,GLuint openGLVBO)
|
||||
:m_clContext(clContext),
|
||||
m_commandQueue(commandQueue),
|
||||
m_openGLVBO(openGLVBO)
|
||||
{
|
||||
cl_int ciErrNum = CL_SUCCESS;
|
||||
// m_buffer = clCreateFromGLBuffer(m_clContext, CL_MEM_WRITE_ONLY, m_openGLVBO, &ciErrNum);
|
||||
m_buffer = clCreateFromGLBuffer(m_clContext, CL_MEM_READ_WRITE, m_openGLVBO, &ciErrNum);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
}
|
||||
|
||||
btOpenCLGLInteropBuffer::~btOpenCLGLInteropBuffer()
|
||||
{
|
||||
cl_int ciErrNum = CL_SUCCESS;
|
||||
clReleaseMemObject (m_buffer);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
}
|
||||
|
||||
void btOpenCLGLInteropBuffer::copyCL2GL()
|
||||
{
|
||||
cl_int ciErrNum = CL_SUCCESS;
|
||||
ciErrNum = clEnqueueAcquireGLObjects(m_commandQueue, 1, &m_buffer, 0, 0, NULL);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
//do some stuff
|
||||
|
||||
|
||||
|
||||
|
||||
ciErrNum = clEnqueueReleaseGLObjects(m_commandQueue, 1, &m_buffer, 0, 0, 0);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
//only wait if necessary
|
||||
// clFinish(m_commandQueue);
|
||||
|
||||
}
|
||||
|
||||
void btOpenCLGLInteropBuffer::copyGL2CL()
|
||||
{
|
||||
}
|
||||
|
||||
@@ -0,0 +1,49 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library
|
||||
Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
///original author: Erwin Coumans
|
||||
|
||||
#ifndef BT_OPENCL_GL_INTEROP_BUFFER_H
|
||||
#define BT_OPENCL_GL_INTEROP_BUFFER_H
|
||||
|
||||
#include "btGlutInclude.h"
|
||||
|
||||
#include "../basic_initialize/btOpenCLInclude.h"
|
||||
|
||||
class btOpenCLGLInteropBuffer
|
||||
{
|
||||
|
||||
cl_context m_clContext;
|
||||
cl_command_queue m_commandQueue;
|
||||
cl_mem m_buffer;
|
||||
GLuint m_openGLVBO;
|
||||
|
||||
public:
|
||||
|
||||
btOpenCLGLInteropBuffer(cl_context clContext, cl_command_queue commandQueue,GLuint openGLVBO);
|
||||
virtual ~btOpenCLGLInteropBuffer();
|
||||
|
||||
void copyCL2GL();
|
||||
|
||||
void copyGL2CL();
|
||||
|
||||
cl_mem getCLBUffer()
|
||||
{
|
||||
return m_buffer;
|
||||
}
|
||||
};
|
||||
|
||||
#endif //BT_OPENCL_GL_INTEROP_BUFFER_H
|
||||
|
||||
@@ -0,0 +1,182 @@
|
||||
/*
|
||||
Stopwatch for timing and profiling for the Bullet Physics Library, http://bulletphysics.org
|
||||
Copyright (c) 2003-2011 Erwin Coumans
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
#include "btStopwatch.h"
|
||||
|
||||
|
||||
#ifdef __CELLOS_LV2__
|
||||
#include <sys/sys_time.h>
|
||||
#include <sys/time_util.h>
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
#if defined (SUNOS) || defined (__SUNOS__)
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
#if defined(WIN32) || defined(_WIN32)
|
||||
|
||||
#define BT_USE_WINDOWS_TIMERS
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#define NOWINRES
|
||||
#define NOMCX
|
||||
#define NOIME
|
||||
|
||||
#ifdef _XBOX
|
||||
#include <Xtl.h>
|
||||
#else //_XBOX
|
||||
#include <windows.h>
|
||||
#endif //_XBOX
|
||||
|
||||
#include <time.h>
|
||||
|
||||
|
||||
#else //_WIN32
|
||||
#include <sys/time.h>
|
||||
#endif //_WIN32
|
||||
|
||||
#define mymin(a,b) (a > b ? a : b)
|
||||
|
||||
struct btStopwatchData
|
||||
{
|
||||
|
||||
#ifdef BT_USE_WINDOWS_TIMERS
|
||||
LARGE_INTEGER mClockFrequency;
|
||||
DWORD mStartTick;
|
||||
LONGLONG mPrevElapsedTime;
|
||||
LARGE_INTEGER mStartTime;
|
||||
#else
|
||||
#ifdef __CELLOS_LV2__
|
||||
uint64_t mStartTime;
|
||||
#else
|
||||
struct timeval mStartTime;
|
||||
#endif
|
||||
#endif //__CELLOS_LV2__
|
||||
|
||||
};
|
||||
|
||||
|
||||
btStopwatch::btStopwatch()
|
||||
{
|
||||
m_data = new btStopwatchData;
|
||||
#ifdef BT_USE_WINDOWS_TIMERS
|
||||
QueryPerformanceFrequency(&m_data->mClockFrequency);
|
||||
#endif
|
||||
reset();
|
||||
}
|
||||
|
||||
btStopwatch::~btStopwatch()
|
||||
{
|
||||
delete m_data;
|
||||
}
|
||||
|
||||
btStopwatch::btStopwatch(const btStopwatch& other)
|
||||
{
|
||||
m_data = new btStopwatchData;
|
||||
*m_data = *other.m_data;
|
||||
}
|
||||
|
||||
btStopwatch& btStopwatch::operator=(const btStopwatch& other)
|
||||
{
|
||||
*m_data = *other.m_data;
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
/// Resets the initial reference time.
|
||||
void btStopwatch::reset()
|
||||
{
|
||||
#ifdef BT_USE_WINDOWS_TIMERS
|
||||
QueryPerformanceCounter(&m_data->mStartTime);
|
||||
m_data->mStartTick = GetTickCount();
|
||||
m_data->mPrevElapsedTime = 0;
|
||||
#else
|
||||
#ifdef __CELLOS_LV2__
|
||||
|
||||
typedef uint64_t ClockSize;
|
||||
ClockSize newTime;
|
||||
//__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory");
|
||||
SYS_TIMEBASE_GET( newTime );
|
||||
m_data->mStartTime = newTime;
|
||||
#else
|
||||
gettimeofday(&m_data->mStartTime, 0);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
/// Returns the time in ms since the last call to reset or since
|
||||
/// the btStopwatch was created.
|
||||
float btStopwatch::getTimeMilliseconds()
|
||||
{
|
||||
return getTimeMicroseconds()/1000.f;
|
||||
}
|
||||
|
||||
/// Returns the time in us since the last call to reset or since
|
||||
/// the stopwatch was created.
|
||||
unsigned long int btStopwatch::getTimeMicroseconds()
|
||||
{
|
||||
#ifdef BT_USE_WINDOWS_TIMERS
|
||||
LARGE_INTEGER currentTime;
|
||||
QueryPerformanceCounter(¤tTime);
|
||||
LONGLONG elapsedTime = currentTime.QuadPart - m_data->mStartTime.QuadPart;
|
||||
|
||||
// Compute the number of millisecond ticks elapsed.
|
||||
unsigned long msecTicks = (unsigned long)(1000 * elapsedTime / m_data->mClockFrequency.QuadPart);
|
||||
|
||||
// Check for unexpected leaps in the Win32 performance counter.
|
||||
// (This is caused by unexpected data across the PCI to ISA
|
||||
// bridge, aka south bridge. See Microsoft KB274323.)
|
||||
unsigned long elapsedTicks = GetTickCount() - m_data->mStartTick;
|
||||
signed long msecOff = (signed long)(msecTicks - elapsedTicks);
|
||||
if (msecOff < -100 || msecOff > 100)
|
||||
{
|
||||
// Adjust the starting time forwards.
|
||||
LONGLONG msecAdjustment = mymin(msecOff *
|
||||
m_data->mClockFrequency.QuadPart / 1000, elapsedTime -
|
||||
m_data->mPrevElapsedTime);
|
||||
m_data->mStartTime.QuadPart += msecAdjustment;
|
||||
elapsedTime -= msecAdjustment;
|
||||
}
|
||||
|
||||
// Store the current elapsed time for adjustments next time.
|
||||
m_data->mPrevElapsedTime = elapsedTime;
|
||||
|
||||
// Convert to microseconds.
|
||||
unsigned long usecTicks = (unsigned long)(1000000 * elapsedTime /
|
||||
m_data->mClockFrequency.QuadPart);
|
||||
|
||||
return usecTicks;
|
||||
#else
|
||||
|
||||
#ifdef __CELLOS_LV2__
|
||||
uint64_t freq=sys_time_get_timebase_frequency();
|
||||
double dFreq=((double) freq)/ 1000000.0;
|
||||
typedef uint64_t ClockSize;
|
||||
ClockSize newTime;
|
||||
//__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory");
|
||||
SYS_TIMEBASE_GET( newTime );
|
||||
|
||||
return (unsigned long int)((double(newTime-m_data->mStartTime)) / dFreq);
|
||||
#else
|
||||
|
||||
struct timeval currentTime;
|
||||
gettimeofday(¤tTime, 0);
|
||||
return (currentTime.tv_sec - m_data->mStartTime.tv_sec) * 1000000 + (currentTime.tv_usec - m_data->mStartTime.tv_usec);
|
||||
#endif//__CELLOS_LV2__
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
Stopwatch for timing and profiling for the Bullet Physics Library, http://bulletphysics.org
|
||||
Copyright (c) 2003-2011 Erwin Coumans
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
#ifndef BT_STOPWATCH_H
|
||||
#define BT_STOPWATCH_H
|
||||
|
||||
///The btStopwatch is a portable basic clock that measures real-time, use for profiling etc.
|
||||
class btStopwatch
|
||||
{
|
||||
public:
|
||||
btStopwatch();
|
||||
|
||||
btStopwatch(const btStopwatch& other);
|
||||
btStopwatch& operator=(const btStopwatch& other);
|
||||
|
||||
~btStopwatch();
|
||||
|
||||
/// Resets the initial reference time.
|
||||
void reset();
|
||||
|
||||
/// Returns the time in ms since the last call to reset or since
|
||||
/// the btStopwatch was created.
|
||||
float getTimeMilliseconds();
|
||||
|
||||
/// Returns the time in us since the last call to reset or since
|
||||
/// the Clock was created.
|
||||
unsigned long int getTimeMicroseconds();
|
||||
private:
|
||||
struct btStopwatchData* m_data;
|
||||
};
|
||||
|
||||
|
||||
#endif //BT_STOPWATCH_H
|
||||
@@ -0,0 +1,13 @@
|
||||
MSTRINGIFY(
|
||||
|
||||
__kernel void
|
||||
interopKernel( const int startOffset, const int numNodes, __global float *g_vertexBuffer)
|
||||
{
|
||||
int nodeID = get_global_id(0);
|
||||
if( nodeID < numNodes )
|
||||
{
|
||||
g_vertexBuffer[nodeID*4 + startOffset+1] += 0.01;
|
||||
}
|
||||
}
|
||||
|
||||
);
|
||||
1057
Extras/RigidBodyGpuPipeline/opencl/opengl_interop/main.cpp
Normal file
1057
Extras/RigidBodyGpuPipeline/opencl/opengl_interop/main.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,5 @@
|
||||
|
||||
include "AMD"
|
||||
include "Intel"
|
||||
-- include "NVIDIA"
|
||||
|
||||
19
Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/Adl.cpp
Normal file
19
Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/Adl.cpp
Normal file
@@ -0,0 +1,19 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
#include <Adl/Adl.h>
|
||||
|
||||
//KernelManager* KernelManager::s_kManager = NULL;
|
||||
235
Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/Adl.h
Normal file
235
Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/Adl.h
Normal file
@@ -0,0 +1,235 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
#ifndef ADL_H
|
||||
#define ADL_H
|
||||
|
||||
#pragma warning( disable : 4996 )
|
||||
#include <Adl/AdlConfig.h>
|
||||
#include <Adl/AdlError.h>
|
||||
#include <algorithm>
|
||||
|
||||
#ifndef max
|
||||
#define max(a,b) (((a) > (b)) ? (a) : (b))
|
||||
#endif
|
||||
|
||||
#ifndef min
|
||||
#define min(a,b) (((a) < (b)) ? (a) : (b))
|
||||
#endif
|
||||
|
||||
namespace adl
|
||||
{
|
||||
|
||||
enum DeviceType
|
||||
{
|
||||
TYPE_CL = 0,
|
||||
TYPE_DX11 = 1,
|
||||
TYPE_HOST,
|
||||
};
|
||||
|
||||
|
||||
struct Device;
|
||||
|
||||
struct BufferBase
|
||||
{
|
||||
enum BufferType
|
||||
{
|
||||
BUFFER,
|
||||
|
||||
// for dx
|
||||
BUFFER_CONST,
|
||||
BUFFER_STAGING,
|
||||
BUFFER_APPEND,
|
||||
BUFFER_RAW,
|
||||
BUFFER_W_COUNTER,
|
||||
BUFFER_INDEX,
|
||||
BUFFER_VERTEX,
|
||||
|
||||
// for cl
|
||||
BUFFER_ZERO_COPY,
|
||||
|
||||
};
|
||||
};
|
||||
|
||||
class DeviceUtils
|
||||
{
|
||||
public:
|
||||
struct Config
|
||||
{
|
||||
enum DeviceType
|
||||
{
|
||||
DEVICE_GPU,
|
||||
DEVICE_CPU,
|
||||
};
|
||||
|
||||
// for CL
|
||||
enum DeviceVendor
|
||||
{
|
||||
VD_AMD,
|
||||
VD_INTEL,
|
||||
VD_NV,
|
||||
};
|
||||
|
||||
Config() : m_type(DEVICE_GPU), m_deviceIdx(0), m_vendor(VD_AMD){}
|
||||
|
||||
DeviceType m_type;
|
||||
int m_deviceIdx;
|
||||
DeviceVendor m_vendor;
|
||||
};
|
||||
|
||||
__inline
|
||||
static
|
||||
int getNDevices( DeviceType type );
|
||||
__inline
|
||||
static Device* allocate( DeviceType type, Config& cfg );
|
||||
__inline
|
||||
static void deallocate( Device* deviceData );
|
||||
__inline
|
||||
static void waitForCompletion( const Device* deviceData );
|
||||
};
|
||||
|
||||
//==========================
|
||||
// DeviceData
|
||||
//==========================
|
||||
struct Kernel;
|
||||
|
||||
struct Device
|
||||
{
|
||||
typedef DeviceUtils::Config Config;
|
||||
|
||||
Device( DeviceType type ) : m_type( type ), m_memoryUsage(0)
|
||||
{
|
||||
}
|
||||
|
||||
virtual void* getContext() const { return 0; }
|
||||
virtual void initialize(const Config& cfg){}
|
||||
virtual void release(){}
|
||||
virtual void waitForCompletion() const {}
|
||||
virtual void getDeviceName( char nameOut[128] ) const {}
|
||||
virtual Kernel* getKernel(const char* fileName, const char* funcName, const char* option = NULL, const char* src = NULL, bool cacheKernel = true ) const { ADLASSERT(0); return 0;}
|
||||
virtual unsigned int getUsedMemory() const { return m_memoryUsage; }
|
||||
|
||||
DeviceType m_type;
|
||||
unsigned int m_memoryUsage;
|
||||
};
|
||||
|
||||
//==========================
|
||||
// Buffer
|
||||
//==========================
|
||||
|
||||
template<typename T>
|
||||
struct HostBuffer;
|
||||
// overload each deviceDatas
|
||||
template<typename T>
|
||||
struct Buffer : public BufferBase
|
||||
{
|
||||
__inline
|
||||
Buffer();
|
||||
__inline
|
||||
Buffer(const Device* device, int nElems, BufferType type = BUFFER );
|
||||
__inline
|
||||
virtual ~Buffer();
|
||||
|
||||
__inline
|
||||
void setRawPtr( const Device* device, T* ptr, int size, BufferType type = BUFFER );
|
||||
__inline
|
||||
void allocate(const Device* device, int nElems, BufferType type = BUFFER );
|
||||
__inline
|
||||
void write(T* hostSrcPtr, int nElems, int dstOffsetNElems = 0);
|
||||
__inline
|
||||
void read(T* hostDstPtr, int nElems, int srcOffsetNElems = 0) const;
|
||||
__inline
|
||||
void write(Buffer<T>& src, int nElems);
|
||||
__inline
|
||||
void read(Buffer<T>& dst, int nElems) const;
|
||||
// __inline
|
||||
// Buffer<T>& operator = (const Buffer<T>& buffer);
|
||||
__inline
|
||||
int getSize() const { return m_size; }
|
||||
|
||||
DeviceType getType() const { ADLASSERT( m_device ); return m_device->m_type; }
|
||||
|
||||
|
||||
const Device* m_device;
|
||||
int m_size;
|
||||
T* m_ptr;
|
||||
// for DX11
|
||||
void* m_uav;
|
||||
void* m_srv;
|
||||
bool m_allocated; // todo. move this to a bit
|
||||
};
|
||||
|
||||
class BufferUtils
|
||||
{
|
||||
public:
|
||||
template<DeviceType TYPE, bool COPY, typename T>
|
||||
__inline
|
||||
static
|
||||
typename Buffer<T>* map(const Device* device, const Buffer<T>* in, int copySize = -1);
|
||||
|
||||
template<bool COPY, typename T>
|
||||
__inline
|
||||
static
|
||||
void unmap( Buffer<T>* native, const Buffer<T>* orig, int copySize = -1 );
|
||||
};
|
||||
|
||||
//==========================
|
||||
// HostBuffer
|
||||
//==========================
|
||||
struct DeviceHost;
|
||||
|
||||
template<typename T>
|
||||
struct HostBuffer : public Buffer<T>
|
||||
{
|
||||
__inline
|
||||
HostBuffer():Buffer<T>(){}
|
||||
__inline
|
||||
HostBuffer(const Device* device, int nElems, BufferType type = BUFFER ) : Buffer<T>(device, nElems, type) {}
|
||||
// HostBuffer(const Device* deviceData, T* rawPtr, int nElems);
|
||||
|
||||
|
||||
__inline
|
||||
T& operator[](int idx);
|
||||
__inline
|
||||
const T& operator[](int idx) const;
|
||||
__inline
|
||||
T* begin() { return m_ptr; }
|
||||
|
||||
__inline
|
||||
HostBuffer<T>& operator = (const Buffer<T>& device);
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
#include <Adl/AdlKernel.h>
|
||||
#if defined(ADL_ENABLE_CL)
|
||||
#include <Adl/CL/AdlCL.inl>
|
||||
#endif
|
||||
#if defined(ADL_ENABLE_DX11)
|
||||
#include <Adl/DX11/AdlDX11.inl>
|
||||
#endif
|
||||
|
||||
#include <Adl/Host/AdlHost.inl>
|
||||
#include <Adl/AdlKernel.inl>
|
||||
#include <Adl/Adl.inl>
|
||||
|
||||
|
||||
#include <Adl/AdlStopwatch.h>
|
||||
|
||||
#include <Adl/Host/AdlStopwatchHost.inl>
|
||||
#include <Adl/AdlStopwatch.inl>
|
||||
|
||||
#endif
|
||||
344
Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/Adl.inl
Normal file
344
Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/Adl.inl
Normal file
@@ -0,0 +1,344 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
namespace adl
|
||||
{
|
||||
|
||||
int DeviceUtils::getNDevices( DeviceType type )
|
||||
{
|
||||
switch( type )
|
||||
{
|
||||
#if defined(ADL_ENABLE_CL)
|
||||
case TYPE_CL:
|
||||
return DeviceCL::getNDevices();
|
||||
#endif
|
||||
#if defined(ADL_ENABLE_DX11)
|
||||
case TYPE_DX11:
|
||||
return DeviceDX11::getNDevices();
|
||||
#endif
|
||||
default:
|
||||
return 1;
|
||||
};
|
||||
}
|
||||
|
||||
Device* DeviceUtils::allocate( DeviceType type, Config& cfg )
|
||||
{
|
||||
Device* deviceData;
|
||||
switch( type )
|
||||
{
|
||||
#if defined(ADL_ENABLE_CL)
|
||||
case TYPE_CL:
|
||||
deviceData = new DeviceCL();
|
||||
break;
|
||||
#endif
|
||||
#if defined(ADL_ENABLE_DX11)
|
||||
case TYPE_DX11:
|
||||
deviceData = new DeviceDX11();
|
||||
break;
|
||||
#endif
|
||||
case TYPE_HOST:
|
||||
deviceData = new DeviceHost();
|
||||
break;
|
||||
default:
|
||||
ADLASSERT( 0 );
|
||||
break;
|
||||
};
|
||||
deviceData->initialize( cfg );
|
||||
return deviceData;
|
||||
}
|
||||
|
||||
void DeviceUtils::deallocate( Device* deviceData )
|
||||
{
|
||||
ADLASSERT( deviceData->getUsedMemory() == 0 );
|
||||
deviceData->release();
|
||||
delete deviceData;
|
||||
}
|
||||
|
||||
void DeviceUtils::waitForCompletion( const Device* deviceData )
|
||||
{
|
||||
deviceData->waitForCompletion();
|
||||
}
|
||||
|
||||
#if defined(ADL_ENABLE_DX11)
|
||||
#if defined(ADL_ENABLE_CL)
|
||||
#define SELECT_DEVICEDATA( type, func ) \
|
||||
switch( type ) \
|
||||
{ \
|
||||
case TYPE_CL: ((DeviceCL*)m_device)->func; break; \
|
||||
case TYPE_DX11: ((DeviceDX11*)m_device)->func; break; \
|
||||
case TYPE_HOST: ((DeviceHost*)m_device)->func; break; \
|
||||
default: ADLASSERT(0); break; \
|
||||
}
|
||||
|
||||
#define SELECT_DEVICEDATA1( deviceData, func ) \
|
||||
switch( deviceData->m_type ) \
|
||||
{ \
|
||||
case TYPE_CL: ((DeviceCL*)deviceData)->func; break; \
|
||||
case TYPE_DX11: ((DeviceDX11*)deviceData)->func; break; \
|
||||
case TYPE_HOST: ((DeviceHost*)deviceData)->func; break; \
|
||||
default: ADLASSERT(0); break; \
|
||||
}
|
||||
#else
|
||||
#define SELECT_DEVICEDATA( type, func ) \
|
||||
switch( type ) \
|
||||
{ \
|
||||
case TYPE_DX11: ((DeviceDX11*)m_device)->func; break; \
|
||||
case TYPE_HOST: ((DeviceHost*)m_device)->func; break; \
|
||||
default: ADLASSERT(0); break; \
|
||||
}
|
||||
|
||||
#define SELECT_DEVICEDATA1( deviceData, func ) \
|
||||
switch( deviceData->m_type ) \
|
||||
{ \
|
||||
case TYPE_DX11: ((DeviceDX11*)deviceData)->func; break; \
|
||||
case TYPE_HOST: ((DeviceHost*)deviceData)->func; break; \
|
||||
default: ADLASSERT(0); break; \
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
#if defined(ADL_ENABLE_CL)
|
||||
#define SELECT_DEVICEDATA( type, func ) \
|
||||
switch( type ) \
|
||||
{ \
|
||||
case TYPE_CL: ((DeviceCL*)m_device)->func; break; \
|
||||
case TYPE_HOST: ((DeviceHost*)m_device)->func; break; \
|
||||
default: ADLASSERT(0); break; \
|
||||
}
|
||||
|
||||
#define SELECT_DEVICEDATA1( deviceData, func ) \
|
||||
switch( deviceData->m_type ) \
|
||||
{ \
|
||||
case TYPE_CL: ((DeviceCL*)deviceData)->func; break; \
|
||||
case TYPE_HOST: ((DeviceHost*)deviceData)->func; break; \
|
||||
default: ADLASSERT(0); break; \
|
||||
}
|
||||
#else
|
||||
#define SELECT_DEVICEDATA( type, func ) \
|
||||
switch( type ) \
|
||||
{ \
|
||||
case TYPE_HOST: ((DeviceHost*)m_device)->func; break; \
|
||||
default: ADLASSERT(0); break; \
|
||||
}
|
||||
|
||||
#define SELECT_DEVICEDATA1( deviceData, func ) \
|
||||
switch( deviceData->m_type ) \
|
||||
{ \
|
||||
case TYPE_HOST: ((DeviceHost*)deviceData)->func; break; \
|
||||
default: ADLASSERT(0); break; \
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
template<typename T>
|
||||
Buffer<T>::Buffer()
|
||||
{
|
||||
m_device = 0;
|
||||
m_size = 0;
|
||||
m_ptr = 0;
|
||||
|
||||
m_uav = 0;
|
||||
m_srv = 0;
|
||||
|
||||
m_allocated = false;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Buffer<T>::Buffer(const Device* deviceData, int nElems, BufferType type )
|
||||
{
|
||||
m_device = 0;
|
||||
allocate( deviceData, nElems, type );
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Buffer<T>::~Buffer()
|
||||
{
|
||||
if( m_allocated )
|
||||
{
|
||||
if( m_device )
|
||||
SELECT_DEVICEDATA( m_device->m_type, deallocate( this ) );
|
||||
}
|
||||
|
||||
m_device = 0;
|
||||
m_ptr = 0;
|
||||
m_size = 0;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Buffer<T>::setRawPtr( const Device* device, T* ptr, int size, BufferType type )
|
||||
{
|
||||
ADLASSERT( m_device == 0 );
|
||||
ADLASSERT( type == BUFFER ); // todo. implement
|
||||
ADLASSERT( device->m_type != TYPE_DX11 ); // todo. implement set srv, uav
|
||||
|
||||
m_device = device;
|
||||
m_ptr = ptr;
|
||||
m_size = size;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Buffer<T>::allocate(const Device* deviceData, int nElems, BufferType type )
|
||||
{
|
||||
ADLASSERT( m_device == 0 );
|
||||
m_device = deviceData;
|
||||
m_size = 0;
|
||||
m_ptr = 0;
|
||||
|
||||
m_uav = 0;
|
||||
m_srv = 0;
|
||||
|
||||
SELECT_DEVICEDATA( m_device->m_type, allocate( this, nElems, type ) );
|
||||
m_allocated = true;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Buffer<T>::write(T* hostPtr, int nElems, int offsetNElems)
|
||||
{
|
||||
ADLASSERT( nElems+offsetNElems <= m_size );
|
||||
SELECT_DEVICEDATA( m_device->m_type, copy(this, hostPtr, nElems, offsetNElems) );
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Buffer<T>::read(T* hostPtr, int nElems, int offsetNElems) const
|
||||
{
|
||||
SELECT_DEVICEDATA( m_device->m_type, copy(hostPtr,this, nElems, offsetNElems) );
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Buffer<T>::write(Buffer<T>& src, int nElems)
|
||||
{
|
||||
ADLASSERT( nElems <= m_size );
|
||||
SELECT_DEVICEDATA( m_device->m_type, copy(this, &src, nElems) );
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Buffer<T>::read(Buffer<T>& dst, int nElems) const
|
||||
{
|
||||
SELECT_DEVICEDATA( m_device->m_type, copy(&dst, this, nElems) );
|
||||
}
|
||||
/*
|
||||
template<typename T>
|
||||
Buffer<T>& Buffer<T>::operator = ( const Buffer<T>& buffer )
|
||||
{
|
||||
// ADLASSERT( buffer.m_size <= m_size );
|
||||
|
||||
SELECT_DEVICEDATA( m_device->m_type, copy(this, &buffer, min2( m_size, buffer.m_size) ) );
|
||||
|
||||
return *this;
|
||||
}
|
||||
*/
|
||||
|
||||
template<DeviceType TYPE, bool COPY, typename T>
|
||||
__inline
|
||||
static
|
||||
typename Buffer<T>* BufferUtils::map(const Device* device, const Buffer<T>* in, int copySize)
|
||||
{
|
||||
Buffer<T>* native;
|
||||
ADLASSERT( device->m_type == TYPE );
|
||||
|
||||
if( in->getType() == TYPE )
|
||||
native = (Buffer<T>*)in;
|
||||
else
|
||||
{
|
||||
ADLASSERT( copySize <= in->getSize() );
|
||||
copySize = (copySize==-1)? in->getSize() : copySize;
|
||||
|
||||
native = new Buffer<T>( device, copySize );
|
||||
if( COPY )
|
||||
{
|
||||
if( in->getType() == TYPE_HOST )
|
||||
native->write( in->m_ptr, copySize );
|
||||
else if( native->getType() == TYPE_HOST )
|
||||
{
|
||||
in->read( native->m_ptr, copySize );
|
||||
DeviceUtils::waitForCompletion( in->m_device );
|
||||
}
|
||||
else
|
||||
{
|
||||
T* tmp = new T[copySize];
|
||||
in->read( tmp, copySize );
|
||||
DeviceUtils::waitForCompletion( in->m_device );
|
||||
native->write( tmp, copySize );
|
||||
DeviceUtils::waitForCompletion( native->m_device );
|
||||
delete [] tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
return native;
|
||||
}
|
||||
|
||||
template<bool COPY, typename T>
|
||||
__inline
|
||||
static
|
||||
void BufferUtils::unmap( Buffer<T>* native, const Buffer<T>* orig, int copySize )
|
||||
{
|
||||
if( native != orig )
|
||||
{
|
||||
if( COPY )
|
||||
{
|
||||
copySize = (copySize==-1)? orig->getSize() : copySize;
|
||||
ADLASSERT( copySize <= orig->getSize() );
|
||||
if( orig->getType() == TYPE_HOST )
|
||||
{
|
||||
native->read( orig->m_ptr, copySize );
|
||||
DeviceUtils::waitForCompletion( native->m_device );
|
||||
}
|
||||
else if( native->getType() == TYPE_HOST )
|
||||
{
|
||||
Buffer<T>* dst = (Buffer<T>*)orig;
|
||||
dst->write( native->m_ptr, copySize );
|
||||
DeviceUtils::waitForCompletion( dst->m_device );
|
||||
}
|
||||
else
|
||||
{
|
||||
T* tmp = new T[copySize];
|
||||
native->read( tmp, copySize );
|
||||
DeviceUtils::waitForCompletion( native->m_device );
|
||||
Buffer<T>* dst = (Buffer<T>*)orig;
|
||||
dst->write( tmp, copySize );
|
||||
DeviceUtils::waitForCompletion( dst->m_device );
|
||||
delete [] tmp;
|
||||
}
|
||||
}
|
||||
delete native;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
T& HostBuffer<T>::operator[](int idx)
|
||||
{
|
||||
return m_ptr[idx];
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
const T& HostBuffer<T>::operator[](int idx) const
|
||||
{
|
||||
return m_ptr[idx];
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
HostBuffer<T>& HostBuffer<T>::operator = ( const Buffer<T>& device )
|
||||
{
|
||||
ADLASSERT( device.m_size <= m_size );
|
||||
|
||||
SELECT_DEVICEDATA1( device.m_device, copy( m_ptr, &device, device.m_size ) );
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
#undef SELECT_DEVICEDATA
|
||||
|
||||
};
|
||||
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
|
||||
//ADL_ENABLE_CL and ADL_ENABLE_DX11 can be set in the build system using C/C++ preprocessor defines
|
||||
//#define ADL_ENABLE_CL
|
||||
//#define ADL_ENABLE_DX11
|
||||
|
||||
//#define ADL_CL_FORCE_UNCACHE_KERNEL
|
||||
#define ADL_CL_DUMP_MEMORY_LOG
|
||||
|
||||
//load the kernels from string instead of loading them from file
|
||||
#define ADL_LOAD_KERNEL_FROM_STRING
|
||||
#define ADL_DUMP_DX11_ERROR
|
||||
80
Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/AdlError.h
Normal file
80
Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/AdlError.h
Normal file
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
#ifndef ADL_ERROR_H
|
||||
#define ADL_ERROR_H
|
||||
|
||||
#if defined(ADL_DUMP_DX11_ERROR)
|
||||
#include <windows.h>
|
||||
#endif
|
||||
#ifdef _DEBUG
|
||||
#include <assert.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
|
||||
namespace adl
|
||||
{
|
||||
|
||||
#ifdef _DEBUG
|
||||
#define ADLASSERT(x) if(!(x)){__debugbreak(); }
|
||||
#else
|
||||
#define ADLASSERT(x) if(x){}
|
||||
#endif
|
||||
|
||||
#ifdef _DEBUG
|
||||
#define COMPILE_TIME_ASSERT(x) {int compileTimeAssertFailed[x]; compileTimeAssertFailed[0];}
|
||||
#else
|
||||
#define COMPILE_TIME_ASSERT(x)
|
||||
#endif
|
||||
|
||||
#ifdef _DEBUG
|
||||
__inline
|
||||
void debugPrintf(const char *fmt, ...)
|
||||
{
|
||||
va_list arg;
|
||||
va_start(arg, fmt);
|
||||
#if defined(ADL_DUMP_DX11_ERROR)
|
||||
const int size = 1024*10;
|
||||
char buf[size];
|
||||
vsprintf_s( buf, size, fmt, arg );
|
||||
#ifdef UNICODE
|
||||
WCHAR wbuf[size];
|
||||
int sizeWide = MultiByteToWideChar(0,0,buf,-1,wbuf,0);
|
||||
MultiByteToWideChar(0,0,buf,-1,wbuf,sizeWide);
|
||||
|
||||
// swprintf_s( wbuf, 256, L"%s", buf );
|
||||
OutputDebugString( wbuf );
|
||||
#else
|
||||
OutputDebugString( buf );
|
||||
#endif
|
||||
#else
|
||||
vprintf(fmt, arg);
|
||||
#endif
|
||||
va_end(arg);
|
||||
}
|
||||
#else
|
||||
__inline
|
||||
void debugPrintf(const char *fmt, ...)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
142
Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/AdlKernel.h
Normal file
142
Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/AdlKernel.h
Normal file
@@ -0,0 +1,142 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
#ifndef ADL_KERNEL_H
|
||||
#define ADL_KERNEL_H
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
|
||||
namespace adl
|
||||
{
|
||||
|
||||
//==========================
|
||||
// Kernel
|
||||
//==========================
|
||||
struct Kernel
|
||||
{
|
||||
DeviceType m_type;
|
||||
void* m_kernel;
|
||||
};
|
||||
|
||||
//==========================
|
||||
// KernelManager
|
||||
//==========================
|
||||
class KernelManager
|
||||
{
|
||||
public:
|
||||
typedef std::map<std::string, Kernel*> KMap;
|
||||
|
||||
__inline
|
||||
~KernelManager();
|
||||
|
||||
__inline
|
||||
// static
|
||||
Kernel* query(const Device* dd, const char* fileName, const char* funcName, const char* option = NULL, const char* src = NULL,
|
||||
bool cacheKernel = true);
|
||||
|
||||
public:
|
||||
KMap m_map;
|
||||
};
|
||||
|
||||
//==========================
|
||||
// Launcher
|
||||
//==========================
|
||||
class Launcher
|
||||
{
|
||||
public:
|
||||
struct BufferInfo
|
||||
{
|
||||
BufferInfo(){}
|
||||
template<typename T>
|
||||
BufferInfo(Buffer<T>* buff, bool isReadOnly = false): m_buffer(buff), m_isReadOnly(isReadOnly){}
|
||||
|
||||
void* m_buffer;
|
||||
bool m_isReadOnly;
|
||||
};
|
||||
|
||||
__inline
|
||||
Launcher(const Device* dd, char* fileName, char* funcName, char* option = NULL);
|
||||
__inline
|
||||
Launcher(const Device* dd, Kernel* kernel);
|
||||
__inline
|
||||
void setBuffers( BufferInfo* buffInfo, int n );
|
||||
template<typename T>
|
||||
__inline
|
||||
void setConst( Buffer<T>& constBuff, const T& consts );
|
||||
__inline
|
||||
void launch1D( int numThreads, int localSize = 64 );
|
||||
__inline
|
||||
void launch2D( int numThreadsX, int numThreadsY, int localSizeX = 8, int localSizeY = 8 );
|
||||
|
||||
public:
|
||||
enum
|
||||
{
|
||||
CONST_BUFFER_SIZE = 512,
|
||||
};
|
||||
|
||||
const Device* m_deviceData;
|
||||
Kernel* m_kernel;
|
||||
int m_idx;
|
||||
int m_idxRw;
|
||||
};
|
||||
|
||||
template<DeviceType TYPE>
|
||||
class KernelBuilder
|
||||
{
|
||||
public:
|
||||
|
||||
__inline
|
||||
KernelBuilder(): m_ptr(0){}
|
||||
|
||||
__inline
|
||||
void setFromFile( const Device* deviceData, const char* fileName, const char* option = NULL, bool addExtension = false,
|
||||
bool cacheKernel = true);
|
||||
|
||||
__inline
|
||||
void setFromSrc( const Device* deviceData, const char* src, const char* option = NULL );
|
||||
|
||||
__inline
|
||||
void setFromSrcCached( const Device* deviceData, const char* src, const char* fileName, const char* option );
|
||||
|
||||
|
||||
__inline
|
||||
void createKernel( const char* funcName, Kernel& kernelOut );
|
||||
|
||||
__inline
|
||||
~KernelBuilder();
|
||||
// todo. implemement in kernel destructor?
|
||||
__inline
|
||||
static void deleteKernel( Kernel& kernel );
|
||||
|
||||
private:
|
||||
enum
|
||||
{
|
||||
MAX_PATH_LENGTH = 260,
|
||||
};
|
||||
const Device* m_deviceData;
|
||||
#ifdef UNICODE
|
||||
wchar_t m_path[MAX_PATH_LENGTH];
|
||||
#else
|
||||
char m_path[MAX_PATH_LENGTH];
|
||||
#endif
|
||||
void* m_ptr;
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
#endif //ADL_KERNEL_H
|
||||
223
Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/AdlKernel.inl
Normal file
223
Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/AdlKernel.inl
Normal file
@@ -0,0 +1,223 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
|
||||
#ifdef ADL_ENABLE_CL
|
||||
#include <Adl/CL/AdlKernelUtilsCL.inl>
|
||||
#endif
|
||||
#ifdef ADL_ENABLE_DX11
|
||||
#include <Adl/DX11/AdlKernelUtilsDX11.inl>
|
||||
#endif
|
||||
|
||||
namespace adl
|
||||
{
|
||||
|
||||
//==========================
|
||||
// KernelManager
|
||||
//==========================
|
||||
Kernel* KernelManager::query(const Device* dd, const char* fileName, const char* funcName, const char* option, const char* src,
|
||||
bool cacheKernel)
|
||||
{
|
||||
printf("compiling kernel %s",funcName);
|
||||
const int charSize = 1024*2;
|
||||
KernelManager* s_kManager = this;
|
||||
|
||||
char fullFineName[charSize];
|
||||
switch( dd->m_type )
|
||||
{
|
||||
case TYPE_CL:
|
||||
#if defined(ADL_ENABLE_CL)
|
||||
sprintf_s(fullFineName,charSize,"%s.cl", fileName);
|
||||
break;
|
||||
#endif
|
||||
#if defined(ADL_ENABLE_DX11)
|
||||
case TYPE_DX11:
|
||||
sprintf_s(fullFineName,charSize,"%s.hlsl", fileName);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
ADLASSERT(0);
|
||||
break;
|
||||
};
|
||||
|
||||
char mapName[charSize];
|
||||
{
|
||||
if( option )
|
||||
sprintf_s(mapName, charSize, "%d%s%s%s", (int)dd->getContext(), fullFineName, funcName, option);
|
||||
else
|
||||
sprintf_s(mapName, charSize, "%d%s%s", (int)dd->getContext(), fullFineName, funcName);
|
||||
}
|
||||
|
||||
std::string str(mapName);
|
||||
|
||||
KMap::iterator iter = s_kManager->m_map.find( str );
|
||||
|
||||
Kernel* kernelOut;
|
||||
if( iter == s_kManager->m_map.end() )
|
||||
{
|
||||
kernelOut = new Kernel();
|
||||
|
||||
switch( dd->m_type )
|
||||
{
|
||||
#if defined(ADL_ENABLE_CL)
|
||||
case TYPE_CL:
|
||||
{
|
||||
KernelBuilder<TYPE_CL> builder;
|
||||
if( src )
|
||||
if (cacheKernel)
|
||||
{
|
||||
builder.setFromSrcCached( dd, src, fileName, option );
|
||||
} else
|
||||
{
|
||||
builder.setFromSrc( dd, src, option );
|
||||
}
|
||||
else
|
||||
builder.setFromFile( dd, fileName, option, true, cacheKernel );
|
||||
builder.createKernel( funcName, *kernelOut );
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
#if defined(ADL_ENABLE_DX11)
|
||||
case TYPE_DX11:
|
||||
{
|
||||
KernelBuilder<TYPE_DX11> builder;
|
||||
if( src )
|
||||
builder.setFromSrc( dd, src, option );
|
||||
else
|
||||
builder.setFromFile( dd, fileName, option, true, cacheKernel );
|
||||
builder.createKernel( funcName, *kernelOut );
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
ADLASSERT(0);
|
||||
break;
|
||||
};
|
||||
s_kManager->m_map.insert( KMap::value_type(str,kernelOut) );
|
||||
}
|
||||
else
|
||||
{
|
||||
kernelOut = iter->second;
|
||||
}
|
||||
|
||||
printf(" ready\n");
|
||||
return kernelOut;
|
||||
}
|
||||
|
||||
KernelManager::~KernelManager()
|
||||
{
|
||||
for(KMap::iterator iter = m_map.begin(); iter != m_map.end(); iter++)
|
||||
{
|
||||
Kernel* k = iter->second;
|
||||
switch( k->m_type )
|
||||
{
|
||||
#if defined(ADL_ENABLE_CL)
|
||||
case TYPE_CL:
|
||||
KernelBuilder<TYPE_CL>::deleteKernel( *k );
|
||||
delete k;
|
||||
break;
|
||||
#endif
|
||||
#if defined(ADL_ENABLE_DX11)
|
||||
case TYPE_DX11:
|
||||
KernelBuilder<TYPE_DX11>::deleteKernel( *k );
|
||||
delete k;
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
ADLASSERT(0);
|
||||
break;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
//==========================
|
||||
// Launcher
|
||||
//==========================
|
||||
|
||||
#if defined(ADL_ENABLE_DX11)
|
||||
#if defined(ADL_ENABLE_CL)
|
||||
#define SELECT_LAUNCHER( type, func ) \
|
||||
switch( type ) \
|
||||
{ \
|
||||
case TYPE_CL: LauncherCL::func; break; \
|
||||
case TYPE_DX11: LauncherDX11::func; break; \
|
||||
default: ADLASSERT(0); break; \
|
||||
};
|
||||
#else
|
||||
#define SELECT_LAUNCHER( type, func ) \
|
||||
switch( type ) \
|
||||
{ \
|
||||
case TYPE_DX11: LauncherDX11::func; break; \
|
||||
default: ADLASSERT(0); break; \
|
||||
};
|
||||
#endif
|
||||
#else
|
||||
#if defined(ADL_ENABLE_CL)
|
||||
#define SELECT_LAUNCHER( type, func ) \
|
||||
switch( type ) \
|
||||
{ \
|
||||
case TYPE_CL: LauncherCL::func; break; \
|
||||
default: ADLASSERT(0); break; \
|
||||
};
|
||||
#else
|
||||
#define SELECT_LAUNCHER( type, func ) \
|
||||
switch( type ) \
|
||||
{ \
|
||||
default: ADLASSERT(0); break; \
|
||||
};
|
||||
#endif
|
||||
#endif
|
||||
|
||||
Launcher::Launcher(const Device *dd, char *fileName, char *funcName, char *option)
|
||||
{
|
||||
m_kernel = dd->getKernel( fileName, funcName, option );
|
||||
m_deviceData = dd;
|
||||
m_idx = 0;
|
||||
m_idxRw = 0;
|
||||
}
|
||||
|
||||
Launcher::Launcher(const Device* dd, Kernel* kernel)
|
||||
{
|
||||
m_kernel = kernel;
|
||||
m_deviceData = dd;
|
||||
m_idx = 0;
|
||||
m_idxRw = 0;
|
||||
}
|
||||
|
||||
void Launcher::setBuffers( BufferInfo* buffInfo, int n )
|
||||
{
|
||||
SELECT_LAUNCHER( m_deviceData->m_type, setBuffers( this, buffInfo, n ) );
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Launcher::setConst( Buffer<T>& constBuff, const T& consts )
|
||||
{
|
||||
SELECT_LAUNCHER( m_deviceData->m_type, setConst( this, constBuff, consts ) );
|
||||
}
|
||||
|
||||
void Launcher::launch1D( int numThreads, int localSize )
|
||||
{
|
||||
SELECT_LAUNCHER( m_deviceData->m_type, launch2D( this, numThreads, 1, localSize, 1 ) );
|
||||
}
|
||||
|
||||
void Launcher::launch2D( int numThreadsX, int numThreadsY, int localSizeX, int localSizeY )
|
||||
{
|
||||
SELECT_LAUNCHER( m_deviceData->m_type, launch2D( this, numThreadsX, numThreadsY, localSizeX, localSizeY ) );
|
||||
}
|
||||
|
||||
#undef SELECT_LAUNCHER
|
||||
|
||||
};
|
||||
@@ -0,0 +1,81 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
|
||||
#include <windows.h>
|
||||
|
||||
namespace adl
|
||||
{
|
||||
|
||||
struct StopwatchBase
|
||||
{
|
||||
__inline
|
||||
StopwatchBase(): m_device(0){}
|
||||
__inline
|
||||
StopwatchBase( const Device* deviceData ){ init(deviceData); }
|
||||
__inline
|
||||
virtual ~StopwatchBase(){}
|
||||
|
||||
__inline
|
||||
virtual void init( const Device* deviceData ) = 0;
|
||||
__inline
|
||||
virtual void start() = 0;
|
||||
__inline
|
||||
virtual void split() = 0;
|
||||
__inline
|
||||
virtual void stop() = 0;
|
||||
__inline
|
||||
virtual float getMs(int index=0) = 0;
|
||||
__inline
|
||||
virtual void getMs( float* times, int capacity ) = 0;
|
||||
__inline
|
||||
int getNIntervals() const{ return m_idx-1;}
|
||||
|
||||
enum
|
||||
{
|
||||
CAPACITY = 64,
|
||||
};
|
||||
|
||||
const Device* m_device;
|
||||
int m_idx;
|
||||
};
|
||||
|
||||
struct Stopwatch
|
||||
{
|
||||
__inline
|
||||
Stopwatch( const Device* deviceData = NULL ) { m_impl=0; if(deviceData) init(deviceData);}
|
||||
__inline
|
||||
~Stopwatch();
|
||||
|
||||
__inline
|
||||
void init( const Device* deviceData );
|
||||
__inline
|
||||
void start(){if(!m_impl) init(0); m_impl->start();}
|
||||
__inline
|
||||
void split(){m_impl->split();}
|
||||
__inline
|
||||
void stop(){m_impl->stop();}
|
||||
__inline
|
||||
float getMs(){ return m_impl->getMs();}
|
||||
__inline
|
||||
void getMs( float* times, int capacity ){m_impl->getMs(times, capacity);}
|
||||
__inline
|
||||
int getNIntervals() const{return m_impl->getNIntervals();}
|
||||
|
||||
StopwatchBase* m_impl;
|
||||
};
|
||||
|
||||
};
|
||||
@@ -0,0 +1,59 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
namespace adl
|
||||
{
|
||||
|
||||
void Stopwatch::init( const Device* deviceData )
|
||||
{
|
||||
ADLASSERT( m_impl == 0 );
|
||||
|
||||
if( deviceData )
|
||||
{
|
||||
switch( deviceData->m_type )
|
||||
{
|
||||
#if defined(ADL_ENABLE_CL)
|
||||
case TYPE_CL:
|
||||
m_impl = new StopwatchHost;//StopwatchCL
|
||||
break;
|
||||
#endif
|
||||
#if defined(ADL_ENABLE_DX11)
|
||||
case TYPE_DX11:
|
||||
m_impl = new StopwatchHost;//StopwatchDX11;
|
||||
break;
|
||||
#endif
|
||||
case TYPE_HOST:
|
||||
m_impl = new StopwatchHost;
|
||||
break;
|
||||
default:
|
||||
ADLASSERT(0);
|
||||
break;
|
||||
};
|
||||
}
|
||||
else
|
||||
{
|
||||
m_impl = new StopwatchHost;
|
||||
}
|
||||
m_impl->init( deviceData );
|
||||
}
|
||||
|
||||
Stopwatch::~Stopwatch()
|
||||
{
|
||||
if( m_impl == 0 ) return;
|
||||
delete m_impl;
|
||||
}
|
||||
|
||||
};
|
||||
384
Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/CL/AdlCL.inl
Normal file
384
Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/CL/AdlCL.inl
Normal file
@@ -0,0 +1,384 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
|
||||
#pragma comment(lib,"OpenCL.lib")
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_ext.h>
|
||||
#include <CL/cl_platform.h>
|
||||
|
||||
namespace adl
|
||||
{
|
||||
|
||||
struct DeviceCL : public Device
|
||||
{
|
||||
typedef DeviceUtils::Config Config;
|
||||
|
||||
|
||||
__inline
|
||||
DeviceCL() : Device( TYPE_CL ), m_kernelManager(0){}
|
||||
__inline
|
||||
void* getContext() const { return m_context; }
|
||||
__inline
|
||||
void initialize(const Config& cfg);
|
||||
__inline
|
||||
void release();
|
||||
|
||||
template<typename T>
|
||||
__inline
|
||||
void allocate(Buffer<T>* buf, int nElems, BufferBase::BufferType type);
|
||||
|
||||
template<typename T>
|
||||
__inline
|
||||
void deallocate(Buffer<T>* buf);
|
||||
|
||||
template<typename T>
|
||||
__inline
|
||||
void copy(Buffer<T>* dst, const Buffer<T>* src, int nElems,int srcOffsetNElems = 0,int dstOffsetNElems = 0);
|
||||
|
||||
template<typename T>
|
||||
__inline
|
||||
void copy(T* dst, const Buffer<T>* src, int nElems, int srcOffsetNElems = 0);
|
||||
|
||||
template<typename T>
|
||||
__inline
|
||||
void copy(Buffer<T>* dst, const T* src, int nElems, int dstOffsetNElems = 0);
|
||||
|
||||
__inline
|
||||
void waitForCompletion() const;
|
||||
|
||||
__inline
|
||||
void getDeviceName( char nameOut[128] ) const;
|
||||
|
||||
__inline
|
||||
static
|
||||
int getNDevices();
|
||||
|
||||
__inline
|
||||
Kernel* getKernel(const char* fileName, const char* funcName, const char* option = NULL, const char* src = NULL, bool cacheKernel = true )const;
|
||||
|
||||
|
||||
enum
|
||||
{
|
||||
MAX_NUM_DEVICES = 6,
|
||||
};
|
||||
|
||||
cl_context m_context;
|
||||
cl_command_queue m_commandQueue;
|
||||
|
||||
cl_device_id m_deviceIdx;
|
||||
|
||||
KernelManager* m_kernelManager;
|
||||
};
|
||||
|
||||
//===
|
||||
//===
|
||||
|
||||
void DeviceCL::initialize(const Config& cfg)
|
||||
{
|
||||
// DeviceUtils::create( cfg, (DeviceCL*)this );
|
||||
{
|
||||
// dd = new DeviceCL();
|
||||
|
||||
DeviceCL* deviceData = (DeviceCL*)this;
|
||||
|
||||
// cl_device_type deviceType = (driverType == DRIVER_HARDWARE)? CL_DEVICE_TYPE_GPU:CL_DEVICE_TYPE_CPU;
|
||||
cl_device_type deviceType = (cfg.m_type== Config::DEVICE_GPU)? CL_DEVICE_TYPE_GPU: CL_DEVICE_TYPE_CPU;
|
||||
// int numContextQueuePairsToCreate = 1;
|
||||
bool enableProfiling = false;
|
||||
#ifdef _DEBUG
|
||||
enableProfiling = true;
|
||||
#endif
|
||||
cl_int status;
|
||||
|
||||
cl_platform_id platform;
|
||||
{
|
||||
cl_uint nPlatforms = 0;
|
||||
status = clGetPlatformIDs(0, NULL, &nPlatforms);
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
|
||||
cl_platform_id pIdx[5];
|
||||
status = clGetPlatformIDs(nPlatforms, pIdx, NULL);
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
|
||||
cl_uint atiIdx = -1;
|
||||
cl_uint intelIdx = -1;
|
||||
cl_uint nvIdx = -1;
|
||||
|
||||
for(cl_uint i=0; i<nPlatforms; i++)
|
||||
{
|
||||
char buff[512];
|
||||
status = clGetPlatformInfo( pIdx[i], CL_PLATFORM_VENDOR, 512, buff, 0 );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
|
||||
//skip the platform if there are no devices available
|
||||
cl_uint numDevice;
|
||||
status = clGetDeviceIDs( pIdx[i], deviceType, 0, NULL, &numDevice );
|
||||
if (numDevice>0)
|
||||
{
|
||||
if( strcmp( buff, "NVIDIA Corporation" )==0 ) nvIdx = i;
|
||||
if( strcmp( buff, "Advanced Micro Devices, Inc." )==0 ) atiIdx = i;
|
||||
if( strcmp( buff, "Intel(R) Corporation" )==0 ) intelIdx = i;
|
||||
}
|
||||
}
|
||||
|
||||
if( deviceType == CL_DEVICE_TYPE_GPU )
|
||||
{
|
||||
switch( cfg.m_vendor )
|
||||
{
|
||||
case DeviceUtils::Config::VD_AMD:
|
||||
if( atiIdx == -1 && nvIdx != -1 ) goto USE_NV_GPU;
|
||||
USE_AMD_GPU:
|
||||
ADLASSERT(atiIdx != -1 );
|
||||
platform = pIdx[atiIdx];
|
||||
break;
|
||||
case DeviceUtils::Config::VD_NV:
|
||||
if( atiIdx != -1 && nvIdx == -1 ) goto USE_AMD_GPU;
|
||||
USE_NV_GPU:
|
||||
ADLASSERT(nvIdx != -1 );
|
||||
platform = pIdx[nvIdx];
|
||||
break;
|
||||
default:
|
||||
ADLASSERT(0);
|
||||
break;
|
||||
};
|
||||
}
|
||||
else if( deviceType == CL_DEVICE_TYPE_CPU )
|
||||
{
|
||||
switch( cfg.m_vendor )
|
||||
{
|
||||
case DeviceUtils::Config::VD_AMD:
|
||||
ADLASSERT(atiIdx != -1 );
|
||||
platform = pIdx[atiIdx];
|
||||
break;
|
||||
case DeviceUtils::Config::VD_INTEL:
|
||||
ADLASSERT(intelIdx != -1 );
|
||||
platform = pIdx[intelIdx];
|
||||
break;
|
||||
default:
|
||||
ADLASSERT(0);
|
||||
break;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
cl_uint numDevice;
|
||||
status = clGetDeviceIDs( platform, deviceType, 0, NULL, &numDevice );
|
||||
|
||||
// ADLASSERT( cfg.m_deviceIdx < (int)numDevice );
|
||||
|
||||
debugPrintf("CL: %d %s Devices ", numDevice, (deviceType==CL_DEVICE_TYPE_GPU)? "GPU":"CPU");
|
||||
|
||||
// numContextQueuePairsToCreate = min( (int)numDevice, numContextQueuePairsToCreate );
|
||||
// numContextQueuePairsToCreate = ( (int)numDevice < numContextQueuePairsToCreate )? numDevice : numContextQueuePairsToCreate;
|
||||
|
||||
cl_device_id deviceIds[ MAX_NUM_DEVICES ];
|
||||
|
||||
status = clGetDeviceIDs( platform, deviceType, numDevice, deviceIds, NULL );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
|
||||
{ int i = min( (int)numDevice-1, cfg.m_deviceIdx );
|
||||
m_deviceIdx = deviceIds[i];
|
||||
deviceData->m_context = clCreateContext( NULL, 1, &deviceData->m_deviceIdx, NULL, NULL, &status );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
|
||||
char buff[512];
|
||||
status = clGetDeviceInfo( deviceData->m_deviceIdx, CL_DEVICE_NAME, sizeof(buff), &buff, NULL );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
|
||||
debugPrintf("[%s]\n", buff);
|
||||
|
||||
deviceData->m_commandQueue = clCreateCommandQueue( deviceData->m_context, deviceData->m_deviceIdx, (enableProfiling)?CL_QUEUE_PROFILING_ENABLE:NULL, NULL );
|
||||
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
|
||||
// status = clSetCommandQueueProperty( commandQueue, CL_QUEUE_PROFILING_ENABLE, CL_TRUE, 0 );
|
||||
// CLASSERT( status == CL_SUCCESS );
|
||||
|
||||
if(0)
|
||||
{
|
||||
cl_bool image_support;
|
||||
clGetDeviceInfo(deviceData->m_deviceIdx, CL_DEVICE_IMAGE_SUPPORT, sizeof(image_support), &image_support, NULL);
|
||||
debugPrintf(" CL_DEVICE_IMAGE_SUPPORT : %s\n", image_support?"Yes":"No");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m_kernelManager = new KernelManager;
|
||||
}
|
||||
|
||||
void DeviceCL::release()
|
||||
{
|
||||
clReleaseCommandQueue( m_commandQueue );
|
||||
clReleaseContext( m_context );
|
||||
|
||||
if( m_kernelManager ) delete m_kernelManager;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void DeviceCL::allocate(Buffer<T>* buf, int nElems, BufferBase::BufferType type)
|
||||
{
|
||||
buf->m_device = this;
|
||||
buf->m_size = nElems;
|
||||
buf->m_ptr = 0;
|
||||
|
||||
if( type == BufferBase::BUFFER_CONST ) return;
|
||||
|
||||
#if defined(ADL_CL_DUMP_MEMORY_LOG)
|
||||
char deviceName[256];
|
||||
getDeviceName( deviceName );
|
||||
printf( "adlCLMemoryLog %s : %3.2fMB Allocation: %3.2fKB ", deviceName, m_memoryUsage/1024.f/1024.f, sizeof(T)*nElems/1024.f );
|
||||
fflush( stdout );
|
||||
#endif
|
||||
|
||||
int sz=sizeof(T)*nElems;
|
||||
|
||||
cl_int status = 0;
|
||||
if( type == BufferBase::BUFFER_ZERO_COPY )
|
||||
buf->m_ptr = (T*)clCreateBuffer( m_context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, sz, 0, &status );
|
||||
else if( type == BufferBase::BUFFER_RAW )
|
||||
buf->m_ptr = (T*)clCreateBuffer( m_context, CL_MEM_WRITE_ONLY, sz, 0, &status );
|
||||
else
|
||||
buf->m_ptr = (T*)clCreateBuffer( m_context, CL_MEM_READ_WRITE, sz, 0, &status );
|
||||
|
||||
m_memoryUsage += buf->m_size*sizeof(T);
|
||||
#if defined(ADL_CL_DUMP_MEMORY_LOG)
|
||||
printf( "%s\n", (status==CL_SUCCESS)? "Succeed": "Failed" );
|
||||
fflush( stdout );
|
||||
#endif
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void DeviceCL::deallocate(Buffer<T>* buf)
|
||||
{
|
||||
if( buf->m_ptr )
|
||||
{
|
||||
m_memoryUsage -= buf->m_size*sizeof(T);
|
||||
clReleaseMemObject( (cl_mem)buf->m_ptr );
|
||||
}
|
||||
buf->m_device = 0;
|
||||
buf->m_size = 0;
|
||||
buf->m_ptr = 0;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void DeviceCL::copy(Buffer<T>* dst, const Buffer<T>* src, int nElems,int srcOffsetNElems,int dstOffsetNElems )
|
||||
{
|
||||
if( dst->m_device->m_type == TYPE_CL && src->m_device->m_type == TYPE_CL )
|
||||
{
|
||||
cl_int status = 0;
|
||||
status = clEnqueueCopyBuffer( m_commandQueue, (cl_mem)src->m_ptr, (cl_mem)dst->m_ptr, sizeof(T)*srcOffsetNElems, sizeof(T)*dstOffsetNElems, sizeof(T)*nElems, 0, 0, 0 );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
}
|
||||
else if( src->m_device->m_type == TYPE_HOST )
|
||||
{
|
||||
ADLASSERT( dst->getType() == TYPE_CL );
|
||||
dst->write( src->m_ptr, nElems );
|
||||
}
|
||||
else if( dst->m_device->m_type == TYPE_HOST )
|
||||
{
|
||||
ADLASSERT( src->getType() == TYPE_CL );
|
||||
src->read( dst->m_ptr, nElems );
|
||||
}
|
||||
else
|
||||
{
|
||||
ADLASSERT( 0 );
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void DeviceCL::copy(T* dst, const Buffer<T>* src, int nElems, int srcOffsetNElems )
|
||||
{
|
||||
cl_int status = 0;
|
||||
status = clEnqueueReadBuffer( m_commandQueue, (cl_mem)src->m_ptr, 0, sizeof(T)*srcOffsetNElems, sizeof(T)*nElems,
|
||||
dst, 0,0,0 );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void DeviceCL::copy(Buffer<T>* dst, const T* src, int nElems, int dstOffsetNElems )
|
||||
{
|
||||
cl_int status = 0;
|
||||
int sz=sizeof(T)*nElems;
|
||||
status = clEnqueueWriteBuffer( m_commandQueue, (cl_mem)dst->m_ptr, 0, sizeof(T)*dstOffsetNElems, sz,
|
||||
src, 0,0,0 );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
}
|
||||
|
||||
void DeviceCL::waitForCompletion() const
|
||||
{
|
||||
clFinish( m_commandQueue );
|
||||
}
|
||||
|
||||
int DeviceCL::getNDevices()
|
||||
{
|
||||
cl_device_type deviceType = CL_DEVICE_TYPE_GPU;
|
||||
cl_int status;
|
||||
|
||||
cl_platform_id platform;
|
||||
{
|
||||
cl_uint nPlatforms = 0;
|
||||
status = clGetPlatformIDs(0, NULL, &nPlatforms);
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
|
||||
cl_platform_id pIdx[5];
|
||||
status = clGetPlatformIDs(nPlatforms, pIdx, NULL);
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
|
||||
cl_uint nvIdx = -1;
|
||||
cl_uint atiIdx = -1;
|
||||
for(cl_uint i=0; i<nPlatforms; i++)
|
||||
{
|
||||
char buff[512];
|
||||
status = clGetPlatformInfo( pIdx[i], CL_PLATFORM_VENDOR, 512, buff, 0 );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
|
||||
if( strcmp( buff, "NVIDIA Corporation" )==0 ) nvIdx = i;
|
||||
if( strcmp( buff, "Advanced Micro Devices, Inc." )==0 ) atiIdx = i;
|
||||
}
|
||||
|
||||
if( deviceType == CL_DEVICE_TYPE_GPU )
|
||||
{
|
||||
if( nvIdx != -1 ) platform = pIdx[nvIdx];
|
||||
else platform = pIdx[atiIdx];
|
||||
}
|
||||
else if( deviceType == CL_DEVICE_TYPE_CPU )
|
||||
{
|
||||
platform = pIdx[atiIdx];
|
||||
}
|
||||
}
|
||||
|
||||
cl_uint numDevice;
|
||||
status = clGetDeviceIDs( platform, deviceType, 0, NULL, &numDevice );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
|
||||
return numDevice;
|
||||
}
|
||||
|
||||
void DeviceCL::getDeviceName( char nameOut[128] ) const
|
||||
{
|
||||
cl_int status;
|
||||
status = clGetDeviceInfo( m_deviceIdx, CL_DEVICE_NAME, sizeof(char)*128, nameOut, NULL );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
}
|
||||
|
||||
Kernel* DeviceCL::getKernel(const char* fileName, const char* funcName, const char* option, const char* src, bool cacheKernel )const
|
||||
{
|
||||
return m_kernelManager->query( this, fileName, funcName, option, src, cacheKernel );
|
||||
}
|
||||
|
||||
};
|
||||
@@ -0,0 +1,541 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
namespace adl
|
||||
{
|
||||
|
||||
struct KernelCL : public Kernel
|
||||
{
|
||||
cl_kernel& getKernel() { return (cl_kernel&)m_kernel; }
|
||||
};
|
||||
|
||||
static const char* strip(const char* name, const char* pattern)
|
||||
{
|
||||
size_t const patlen = strlen(pattern);
|
||||
size_t patcnt = 0;
|
||||
const char * oriptr;
|
||||
const char * patloc;
|
||||
// find how many times the pattern occurs in the original string
|
||||
for (oriptr = name; patloc = strstr(oriptr, pattern); oriptr = patloc + patlen)
|
||||
{
|
||||
patcnt++;
|
||||
}
|
||||
return oriptr;
|
||||
}
|
||||
|
||||
static bool isFileUpToDate(const char* binaryFileName,const char* srcFileName)
|
||||
|
||||
{
|
||||
bool fileUpToDate = false;
|
||||
|
||||
bool binaryFileValid=false;
|
||||
FILETIME modtimeBinary;
|
||||
|
||||
int nameLength = (int)strlen(binaryFileName)+1;
|
||||
#ifdef UNICODE
|
||||
WCHAR* fName = new WCHAR[nameLength];
|
||||
MultiByteToWideChar(CP_ACP,0,binaryFileName,-1, fName, nameLength);
|
||||
HANDLE binaryFileHandle = CreateFile(fName,GENERIC_READ,0,0,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,0);
|
||||
delete [] fName;
|
||||
#else
|
||||
HANDLE binaryFileHandle = CreateFile(binaryFileName,GENERIC_READ,0,0,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,0);
|
||||
#endif
|
||||
if (binaryFileHandle ==INVALID_HANDLE_VALUE)
|
||||
{
|
||||
DWORD errorCode;
|
||||
errorCode = GetLastError();
|
||||
switch (errorCode)
|
||||
{
|
||||
case ERROR_FILE_NOT_FOUND:
|
||||
{
|
||||
debugPrintf("\nCached file not found %s\n", binaryFileName);
|
||||
break;
|
||||
}
|
||||
case ERROR_PATH_NOT_FOUND:
|
||||
{
|
||||
debugPrintf("\nCached file path not found %s\n", binaryFileName);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
debugPrintf("\nFailed reading cached file with errorCode = %d\n", errorCode);
|
||||
}
|
||||
}
|
||||
} else
|
||||
{
|
||||
if (GetFileTime(binaryFileHandle, NULL, NULL, &modtimeBinary)==0)
|
||||
{
|
||||
DWORD errorCode;
|
||||
errorCode = GetLastError();
|
||||
debugPrintf("\nGetFileTime errorCode = %d\n", errorCode);
|
||||
} else
|
||||
{
|
||||
binaryFileValid = true;
|
||||
}
|
||||
CloseHandle(binaryFileHandle);
|
||||
}
|
||||
|
||||
if (binaryFileValid)
|
||||
{
|
||||
#ifdef UNICODE
|
||||
int nameLength = (int)strlen(srcFileName)+1;
|
||||
WCHAR* fName = new WCHAR[nameLength];
|
||||
MultiByteToWideChar(CP_ACP,0,srcFileName,-1, fName, nameLength);
|
||||
HANDLE srcFileHandle = CreateFile(fName,GENERIC_READ,0,0,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,0);
|
||||
delete [] fName;
|
||||
#else
|
||||
HANDLE srcFileHandle = CreateFile(srcFileName,GENERIC_READ,0,0,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,0);
|
||||
#endif
|
||||
if (srcFileHandle!=INVALID_HANDLE_VALUE)
|
||||
{
|
||||
FILETIME modtimeSrc;
|
||||
if (GetFileTime(srcFileHandle, NULL, NULL, &modtimeSrc)==0)
|
||||
{
|
||||
DWORD errorCode;
|
||||
errorCode = GetLastError();
|
||||
debugPrintf("\nGetFileTime errorCode = %d\n", errorCode);
|
||||
}
|
||||
if ( ( modtimeSrc.dwHighDateTime < modtimeBinary.dwHighDateTime)
|
||||
||(( modtimeSrc.dwHighDateTime == modtimeBinary.dwHighDateTime)&&(modtimeSrc.dwLowDateTime <= modtimeBinary.dwLowDateTime)))
|
||||
{
|
||||
fileUpToDate=true;
|
||||
} else
|
||||
{
|
||||
debugPrintf("\nCached binary file found (%s), but out-of-date\n",binaryFileName);
|
||||
}
|
||||
CloseHandle(srcFileHandle);
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef _DEBUG
|
||||
DWORD errorCode;
|
||||
errorCode = GetLastError();
|
||||
switch (errorCode)
|
||||
{
|
||||
case ERROR_FILE_NOT_FOUND:
|
||||
{
|
||||
debugPrintf("\nSrc file not found %s\n", srcFileName);
|
||||
break;
|
||||
}
|
||||
case ERROR_PATH_NOT_FOUND:
|
||||
{
|
||||
debugPrintf("\nSrc path not found %s\n", srcFileName);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
debugPrintf("\nnSrc file reading errorCode = %d\n", errorCode);
|
||||
}
|
||||
}
|
||||
ADLASSERT(0);
|
||||
#else
|
||||
//if we cannot find the source, assume it is OK in release builds
|
||||
fileUpToDate = true;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return fileUpToDate;
|
||||
}
|
||||
|
||||
template<>
|
||||
void KernelBuilder<TYPE_CL>::setFromFile( const Device* deviceData, const char* fileName, const char* option, bool addExtension,
|
||||
bool cacheKernel)
|
||||
{
|
||||
m_deviceData = deviceData;
|
||||
|
||||
char fileNameWithExtension[256];
|
||||
|
||||
if( addExtension )
|
||||
sprintf_s( fileNameWithExtension, "%s.cl", fileName );
|
||||
else
|
||||
sprintf_s( fileNameWithExtension, "%s", fileName );
|
||||
|
||||
class File
|
||||
{
|
||||
public:
|
||||
__inline
|
||||
bool open(const char* fileNameWithExtension)
|
||||
{
|
||||
size_t size;
|
||||
char* str;
|
||||
|
||||
// Open file stream
|
||||
std::fstream f(fileNameWithExtension, (std::fstream::in | std::fstream::binary));
|
||||
|
||||
// Check if we have opened file stream
|
||||
if (f.is_open()) {
|
||||
size_t sizeFile;
|
||||
// Find the stream size
|
||||
f.seekg(0, std::fstream::end);
|
||||
size = sizeFile = (size_t)f.tellg();
|
||||
f.seekg(0, std::fstream::beg);
|
||||
|
||||
str = new char[size + 1];
|
||||
if (!str) {
|
||||
f.close();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Read file
|
||||
f.read(str, sizeFile);
|
||||
f.close();
|
||||
str[size] = '\0';
|
||||
|
||||
m_source = str;
|
||||
|
||||
delete[] str;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
const std::string& getSource() const {return m_source;}
|
||||
|
||||
private:
|
||||
std::string m_source;
|
||||
};
|
||||
|
||||
cl_program& program = (cl_program&)m_ptr;
|
||||
cl_int status = 0;
|
||||
|
||||
bool cacheBinary = cacheKernel;
|
||||
#if defined(ADL_CL_FORCE_UNCACHE_KERNEL)
|
||||
cacheBinary = false;
|
||||
#endif
|
||||
|
||||
char binaryFileName[512];
|
||||
{
|
||||
char deviceName[256];
|
||||
deviceData->getDeviceName(deviceName);
|
||||
char driverVersion[256];
|
||||
const DeviceCL* dd = (const DeviceCL*) deviceData;
|
||||
clGetDeviceInfo(dd->m_deviceIdx, CL_DRIVER_VERSION, 256, &driverVersion, NULL);
|
||||
const char* strippedFileName = strip(fileName,"\\");
|
||||
strippedFileName = strip(strippedFileName,"/");
|
||||
|
||||
sprintf_s(binaryFileName,"cache/%s.%s.%s.bin",strippedFileName, deviceName,driverVersion );
|
||||
}
|
||||
|
||||
bool upToDate = isFileUpToDate(binaryFileName,fileNameWithExtension);
|
||||
|
||||
if( cacheBinary && upToDate)
|
||||
{
|
||||
FILE* file = fopen(binaryFileName, "rb");
|
||||
|
||||
if( file )
|
||||
{
|
||||
fseek( file, 0L, SEEK_END );
|
||||
size_t binarySize = ftell( file );
|
||||
|
||||
rewind( file );
|
||||
char* binary = new char[binarySize];
|
||||
fread( binary, sizeof(char), binarySize, file );
|
||||
fclose( file );
|
||||
|
||||
if (binarySize)
|
||||
{
|
||||
const DeviceCL* dd = (const DeviceCL*) deviceData;
|
||||
program = clCreateProgramWithBinary( dd->m_context, 1, &dd->m_deviceIdx, &binarySize, (const unsigned char**)&binary, 0, &status );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
status = clBuildProgram( program, 1, &dd->m_deviceIdx, option, 0, 0 );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
if( status != CL_SUCCESS )
|
||||
{
|
||||
char *build_log;
|
||||
size_t ret_val_size;
|
||||
clGetProgramBuildInfo(program, dd->m_deviceIdx, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
|
||||
build_log = new char[ret_val_size+1];
|
||||
clGetProgramBuildInfo(program, dd->m_deviceIdx, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
|
||||
|
||||
build_log[ret_val_size] = '\0';
|
||||
|
||||
debugPrintf("%s\n", build_log);
|
||||
|
||||
delete build_log;
|
||||
ADLASSERT(0);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
if( !m_ptr )
|
||||
{
|
||||
File kernelFile;
|
||||
ADLASSERT( kernelFile.open( fileNameWithExtension ) );
|
||||
const char* source = kernelFile.getSource().c_str();
|
||||
setFromSrc( m_deviceData, source, option );
|
||||
|
||||
if( cacheBinary )
|
||||
{ // write to binary
|
||||
size_t binarySize;
|
||||
status = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binarySize, 0 );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
|
||||
char* binary = new char[binarySize];
|
||||
|
||||
status = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof(char*), &binary, 0 );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
|
||||
{
|
||||
FILE* file = fopen(binaryFileName, "wb");
|
||||
if (file)
|
||||
{
|
||||
fwrite( binary, sizeof(char), binarySize, file );
|
||||
fclose( file );
|
||||
}
|
||||
}
|
||||
|
||||
delete [] binary;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
void KernelBuilder<TYPE_CL>::setFromSrcCached( const Device* deviceData, const char* src, const char* fileName, const char* option )
|
||||
{
|
||||
m_deviceData = deviceData;
|
||||
|
||||
bool cacheBinary = true;
|
||||
cl_program& program = (cl_program&)m_ptr;
|
||||
cl_int status = 0;
|
||||
|
||||
char binaryFileName[512];
|
||||
{
|
||||
char deviceName[256];
|
||||
deviceData->getDeviceName(deviceName);
|
||||
char driverVersion[256];
|
||||
const DeviceCL* dd = (const DeviceCL*) deviceData;
|
||||
clGetDeviceInfo(dd->m_deviceIdx, CL_DRIVER_VERSION, 256, &driverVersion, NULL);
|
||||
|
||||
const char* strippedFileName = strip(fileName,"\\");
|
||||
strippedFileName = strip(strippedFileName,"/");
|
||||
|
||||
sprintf_s(binaryFileName,"cache/%s.%s.%s.bin",strippedFileName, deviceName,driverVersion );
|
||||
}
|
||||
|
||||
|
||||
char fileNameWithExtension[256];
|
||||
sprintf_s(fileNameWithExtension,"%s.cl",fileName, ".cl");
|
||||
|
||||
bool upToDate = isFileUpToDate(binaryFileName,fileNameWithExtension);
|
||||
|
||||
|
||||
if( cacheBinary )
|
||||
{
|
||||
|
||||
bool fileUpToDate = isFileUpToDate(binaryFileName,fileNameWithExtension);
|
||||
|
||||
if( fileUpToDate)
|
||||
{
|
||||
FILE* file = fopen(binaryFileName, "rb");
|
||||
if (file)
|
||||
{
|
||||
fseek( file, 0L, SEEK_END );
|
||||
size_t binarySize = ftell( file );
|
||||
rewind( file );
|
||||
char* binary = new char[binarySize];
|
||||
fread( binary, sizeof(char), binarySize, file );
|
||||
fclose( file );
|
||||
|
||||
const DeviceCL* dd = (const DeviceCL*) deviceData;
|
||||
program = clCreateProgramWithBinary( dd->m_context, 1, &dd->m_deviceIdx, &binarySize, (const unsigned char**)&binary, 0, &status );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
status = clBuildProgram( program, 1, &dd->m_deviceIdx, option, 0, 0 );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
|
||||
if( status != CL_SUCCESS )
|
||||
{
|
||||
char *build_log;
|
||||
size_t ret_val_size;
|
||||
clGetProgramBuildInfo(program, dd->m_deviceIdx, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
|
||||
build_log = new char[ret_val_size+1];
|
||||
clGetProgramBuildInfo(program, dd->m_deviceIdx, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
|
||||
|
||||
build_log[ret_val_size] = '\0';
|
||||
|
||||
debugPrintf("%s\n", build_log);
|
||||
|
||||
delete build_log;
|
||||
ADLASSERT(0);
|
||||
}
|
||||
delete[] binary;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if( !m_ptr )
|
||||
{
|
||||
|
||||
setFromSrc( deviceData, src, option );
|
||||
|
||||
if( cacheBinary )
|
||||
{ // write to binary
|
||||
cl_uint numAssociatedDevices;
|
||||
status = clGetProgramInfo( program, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &numAssociatedDevices, 0 );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
if (numAssociatedDevices==1)
|
||||
{
|
||||
|
||||
|
||||
size_t binarySize;
|
||||
status = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binarySize, 0 );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
|
||||
char* binary = new char[binarySize];
|
||||
|
||||
status = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof(char*), &binary, 0 );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
|
||||
{
|
||||
FILE* file = fopen(binaryFileName, "wb");
|
||||
if (file)
|
||||
{
|
||||
fwrite( binary, sizeof(char), binarySize, file );
|
||||
fclose( file );
|
||||
}
|
||||
}
|
||||
|
||||
delete [] binary;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<>
|
||||
void KernelBuilder<TYPE_CL>::setFromSrc( const Device* deviceData, const char* src, const char* option )
|
||||
{
|
||||
ADLASSERT( deviceData->m_type == TYPE_CL );
|
||||
m_deviceData = deviceData;
|
||||
const DeviceCL* dd = (const DeviceCL*) deviceData;
|
||||
|
||||
cl_program& program = (cl_program&)m_ptr;
|
||||
cl_int status = 0;
|
||||
size_t srcSize[] = {strlen( src )};
|
||||
program = clCreateProgramWithSource( dd->m_context, 1, &src, srcSize, &status );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
status = clBuildProgram( program, 1, &dd->m_deviceIdx, option, NULL, NULL );
|
||||
if( status != CL_SUCCESS )
|
||||
{
|
||||
char *build_log;
|
||||
size_t ret_val_size;
|
||||
clGetProgramBuildInfo(program, dd->m_deviceIdx, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
|
||||
build_log = new char[ret_val_size+1];
|
||||
clGetProgramBuildInfo(program, dd->m_deviceIdx, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
|
||||
|
||||
build_log[ret_val_size] = '\0';
|
||||
|
||||
debugPrintf("%s\n", build_log);
|
||||
printf("%s\n", build_log);
|
||||
|
||||
ADLASSERT(0);
|
||||
delete build_log;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
KernelBuilder<TYPE_CL>::~KernelBuilder()
|
||||
{
|
||||
cl_program program = (cl_program)m_ptr;
|
||||
clReleaseProgram( program );
|
||||
}
|
||||
|
||||
template<>
|
||||
void KernelBuilder<TYPE_CL>::createKernel( const char* funcName, Kernel& kernelOut )
|
||||
{
|
||||
KernelCL* clKernel = (KernelCL*)&kernelOut;
|
||||
|
||||
cl_program program = (cl_program)m_ptr;
|
||||
cl_int status = 0;
|
||||
clKernel->getKernel() = clCreateKernel(program, funcName, &status );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
|
||||
kernelOut.m_type = TYPE_CL;
|
||||
}
|
||||
|
||||
template<>
|
||||
void KernelBuilder<TYPE_CL>::deleteKernel( Kernel& kernel )
|
||||
{
|
||||
KernelCL* clKernel = (KernelCL*)&kernel;
|
||||
clReleaseKernel( clKernel->getKernel() );
|
||||
}
|
||||
|
||||
|
||||
|
||||
class LauncherCL
|
||||
{
|
||||
public:
|
||||
typedef Launcher::BufferInfo BufferInfo;
|
||||
|
||||
__inline
|
||||
static void setBuffers( Launcher* launcher, BufferInfo* buffInfo, int n );
|
||||
template<typename T>
|
||||
__inline
|
||||
static void setConst( Launcher* launcher, Buffer<T>& constBuff, const T& consts );
|
||||
__inline
|
||||
static void launch2D( Launcher* launcher, int numThreadsX, int numThreadsY, int localSizeX, int localSizeY );
|
||||
};
|
||||
|
||||
void LauncherCL::setBuffers( Launcher* launcher, BufferInfo* buffInfo, int n )
|
||||
{
|
||||
KernelCL* clKernel = (KernelCL*)launcher->m_kernel;
|
||||
for(int i=0; i<n; i++)
|
||||
{
|
||||
Buffer<int>* buff = (Buffer<int>*)buffInfo[i].m_buffer;
|
||||
cl_int status = clSetKernelArg( clKernel->getKernel(), launcher->m_idx++, sizeof(cl_mem), &buff->m_ptr );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void LauncherCL::setConst( Launcher* launcher, Buffer<T>& constBuff, const T& consts )
|
||||
{
|
||||
KernelCL* clKernel = (KernelCL*)launcher->m_kernel;
|
||||
int sz=sizeof(T);
|
||||
cl_int status = clSetKernelArg( clKernel->getKernel(), launcher->m_idx++, sz, &consts );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
}
|
||||
|
||||
void LauncherCL::launch2D( Launcher* launcher, int numThreadsX, int numThreadsY, int localSizeX, int localSizeY )
|
||||
{
|
||||
KernelCL* clKernel = (KernelCL*)launcher->m_kernel;
|
||||
const DeviceCL* ddcl = (const DeviceCL*)launcher->m_deviceData;
|
||||
size_t gRange[3] = {1,1,1};
|
||||
size_t lRange[3] = {1,1,1};
|
||||
lRange[0] = localSizeX;
|
||||
lRange[1] = localSizeY;
|
||||
gRange[0] = max((size_t)1, (numThreadsX/lRange[0])+(!(numThreadsX%lRange[0])?0:1));
|
||||
gRange[0] *= lRange[0];
|
||||
gRange[1] = max((size_t)1, (numThreadsY/lRange[1])+(!(numThreadsY%lRange[1])?0:1));
|
||||
gRange[1] *= lRange[1];
|
||||
|
||||
cl_int status = clEnqueueNDRangeKernel( ddcl->m_commandQueue,
|
||||
clKernel->getKernel(), 2, NULL, gRange, lRange, 0,0,0 );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
@@ -0,0 +1,512 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
#include <windows.h>
|
||||
#include <d3d11.h>
|
||||
#include <d3dx11.h>
|
||||
#include <d3dcompiler.h>
|
||||
#include <DXGI.h>
|
||||
#pragma comment(lib,"d3dx11.lib")
|
||||
#pragma comment(lib,"d3d11.lib")
|
||||
#pragma comment(lib,"DXGI.lib")
|
||||
|
||||
namespace adl
|
||||
{
|
||||
|
||||
#define u32 unsigned int
|
||||
|
||||
struct DeviceDX11 : public Device
|
||||
{
|
||||
typedef DeviceUtils::Config Config;
|
||||
|
||||
|
||||
__inline
|
||||
DeviceDX11() : Device( TYPE_DX11 ), m_kernelManager(0){}
|
||||
__inline
|
||||
void* getContext() const { return m_context; }
|
||||
__inline
|
||||
void initialize(const Config& cfg);
|
||||
__inline
|
||||
void release();
|
||||
|
||||
template<typename T>
|
||||
__inline
|
||||
void allocate(Buffer<T>* buf, int nElems, BufferBase::BufferType type);
|
||||
|
||||
template<typename T>
|
||||
__inline
|
||||
void deallocate(Buffer<T>* buf);
|
||||
|
||||
template<typename T>
|
||||
__inline
|
||||
void copy(Buffer<T>* dst, const Buffer<T>* src, int nElems);
|
||||
|
||||
template<typename T>
|
||||
__inline
|
||||
void copy(T* dst, const Buffer<T>* src, int nElems, int srcOffsetNElems = 0);
|
||||
|
||||
template<typename T>
|
||||
__inline
|
||||
void copy(Buffer<T>* dst, const T* src, int nElems, int dstOffsetNElems = 0);
|
||||
|
||||
__inline
|
||||
void waitForCompletion() const;
|
||||
|
||||
__inline
|
||||
void getDeviceName( char nameOut[128] ) const;
|
||||
|
||||
__inline
|
||||
static
|
||||
int getNDevices();
|
||||
|
||||
__inline
|
||||
Kernel* getKernel(const char* fileName, const char* funcName, const char* option = NULL, const char* src = NULL, bool cacheKernel = true )const;
|
||||
|
||||
|
||||
ID3D11DeviceContext* m_context;
|
||||
ID3D11Device* m_device;
|
||||
IDXGISwapChain* m_swapChain;
|
||||
|
||||
KernelManager* m_kernelManager;
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct BufferDX11 : public Buffer<T>
|
||||
{
|
||||
ID3D11Buffer* getBuffer() { return (ID3D11Buffer*)m_ptr; }
|
||||
ID3D11UnorderedAccessView* getUAV() { return (ID3D11UnorderedAccessView*)m_uav; }
|
||||
ID3D11ShaderResourceView* getSRV() { return (ID3D11ShaderResourceView*)m_srv; }
|
||||
|
||||
ID3D11Buffer** getBufferPtr() { return (ID3D11Buffer**)&m_ptr; }
|
||||
ID3D11UnorderedAccessView** getUAVPtr() { return (ID3D11UnorderedAccessView**)&m_uav; }
|
||||
ID3D11ShaderResourceView** getSRVPtr() { return (ID3D11ShaderResourceView**)&m_srv; }
|
||||
};
|
||||
|
||||
#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } }
|
||||
|
||||
|
||||
void DeviceDX11::initialize(const Config& cfg)
|
||||
{
|
||||
DeviceDX11* deviceData = this;
|
||||
|
||||
HRESULT hr = S_OK;
|
||||
UINT createDeviceFlg = 0;
|
||||
#ifdef _DEBUG
|
||||
createDeviceFlg |= D3D11_CREATE_DEVICE_DEBUG;
|
||||
#endif
|
||||
D3D_FEATURE_LEVEL fl[] = {
|
||||
D3D_FEATURE_LEVEL_11_0,
|
||||
D3D_FEATURE_LEVEL_10_1,
|
||||
D3D_FEATURE_LEVEL_10_0
|
||||
};
|
||||
|
||||
typedef HRESULT (WINAPI * LPD3D11CREATEDEVICE)( IDXGIAdapter*, D3D_DRIVER_TYPE, HMODULE, u32, D3D_FEATURE_LEVEL*, UINT, u32, ID3D11Device**, D3D_FEATURE_LEVEL*, ID3D11DeviceContext** );
|
||||
|
||||
HMODULE moduleD3D11 = 0;
|
||||
#ifdef UNICODE
|
||||
moduleD3D11 = LoadLibrary( L"d3d11.dll" );
|
||||
#else
|
||||
moduleD3D11 = LoadLibrary( "d3d11.dll" );
|
||||
#endif
|
||||
ADLASSERT( moduleD3D11 );
|
||||
|
||||
LPD3D11CREATEDEVICE _DynamicD3D11CreateDevice;
|
||||
_DynamicD3D11CreateDevice = ( LPD3D11CREATEDEVICE )GetProcAddress( moduleD3D11, "D3D11CreateDevice" );
|
||||
|
||||
D3D_DRIVER_TYPE type = D3D_DRIVER_TYPE_HARDWARE;
|
||||
// http://msdn.microsoft.com/en-us/library/ff476082(v=VS.85).aspx
|
||||
// If you set the pAdapter parameter to a non-NULL value, you must also set the DriverType parameter to the D3D_DRIVER_TYPE_UNKNOWN value. If you set the pAdapter parameter to a non-NULL value and the DriverType parameter to the D3D_DRIVER_TYPE_HARDWARE value, D3D11CreateDevice returns an HRESULT of E_INVALIDARG.
|
||||
type = D3D_DRIVER_TYPE_UNKNOWN;
|
||||
/*
|
||||
// Create a hardware Direct3D 11 device
|
||||
hr = _DynamicD3D11CreateDevice( NULL,
|
||||
type, NULL, createDeviceFlg,
|
||||
fl, _countof(fl), D3D11_SDK_VERSION, &deviceData->m_device, NULL, &deviceData->m_context );
|
||||
*/
|
||||
IDXGIAdapter* adapter = NULL;
|
||||
{// get adapter of the index
|
||||
IDXGIFactory* factory = NULL;
|
||||
int targetAdapterIdx = cfg.m_deviceIdx;//min( cfg.m_deviceIdx, getNDevices()-1 );
|
||||
CreateDXGIFactory( __uuidof(IDXGIFactory), (void**)&factory );
|
||||
|
||||
u32 i = 0;
|
||||
while( factory->EnumAdapters( i, &adapter ) != DXGI_ERROR_NOT_FOUND )
|
||||
{
|
||||
if( i== targetAdapterIdx ) break;
|
||||
i++;
|
||||
}
|
||||
factory->Release();
|
||||
}
|
||||
|
||||
// Create a hardware Direct3D 11 device
|
||||
hr = D3D11CreateDevice( adapter,
|
||||
type,
|
||||
NULL, createDeviceFlg,
|
||||
fl, _countof(fl), D3D11_SDK_VERSION, &deviceData->m_device, NULL, &deviceData->m_context );
|
||||
|
||||
ADLASSERT( hr == S_OK );
|
||||
|
||||
// Check if the hardware device supports Compute Shader 4.0
|
||||
D3D11_FEATURE_DATA_D3D10_X_HARDWARE_OPTIONS hwopts;
|
||||
deviceData->m_device->CheckFeatureSupport(D3D11_FEATURE_D3D10_X_HARDWARE_OPTIONS, &hwopts, sizeof(hwopts));
|
||||
|
||||
if( !hwopts.ComputeShaders_Plus_RawAndStructuredBuffers_Via_Shader_4_x )
|
||||
{
|
||||
SAFE_RELEASE( deviceData->m_context );
|
||||
SAFE_RELEASE( deviceData->m_device );
|
||||
|
||||
debugPrintf("DX11 GPU is not present\n");
|
||||
ADLASSERT( 0 );
|
||||
}
|
||||
|
||||
m_kernelManager = new KernelManager;
|
||||
}
|
||||
|
||||
void DeviceDX11::release()
|
||||
{
|
||||
SAFE_RELEASE( m_context );
|
||||
SAFE_RELEASE( m_device );
|
||||
|
||||
if( m_kernelManager ) delete m_kernelManager;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void DeviceDX11::allocate(Buffer<T>* buf, int nElems, BufferBase::BufferType type)
|
||||
{
|
||||
ADLASSERT( type != BufferBase::BUFFER_ZERO_COPY );
|
||||
|
||||
DeviceDX11* deviceData = this;
|
||||
buf->m_device = deviceData;
|
||||
buf->m_size = nElems;
|
||||
BufferDX11<T>* dBuf = (BufferDX11<T>*)buf;
|
||||
|
||||
// if( type & BufferBase::BUFFER )
|
||||
{
|
||||
HRESULT hr = S_OK;
|
||||
|
||||
if( type == BufferBase::BUFFER_CONST )
|
||||
{
|
||||
ADLASSERT( nElems == 1 );
|
||||
D3D11_BUFFER_DESC constant_buffer_desc;
|
||||
ZeroMemory( &constant_buffer_desc, sizeof(constant_buffer_desc) );
|
||||
// constant_buffer_desc.ByteWidth = NEXTMULTIPLEOF( sizeof(T), 16 );
|
||||
constant_buffer_desc.ByteWidth = (((sizeof(T))/(16) + (((sizeof(T))%(16)==0)?0:1))*(16));
|
||||
// constant_buffer_desc.Usage = D3D11_USAGE_DYNAMIC;
|
||||
// constant_buffer_desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
|
||||
// constant_buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
|
||||
constant_buffer_desc.Usage = D3D11_USAGE_DEFAULT;
|
||||
constant_buffer_desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
|
||||
constant_buffer_desc.CPUAccessFlags = 0;
|
||||
|
||||
hr = deviceData->m_device->CreateBuffer( &constant_buffer_desc, NULL, dBuf->getBufferPtr() );
|
||||
ADLASSERT( hr == S_OK );
|
||||
return;
|
||||
}
|
||||
|
||||
D3D11_BUFFER_DESC buffer_desc;
|
||||
ZeroMemory(&buffer_desc, sizeof(buffer_desc));
|
||||
buffer_desc.ByteWidth = nElems * sizeof(T);
|
||||
|
||||
if( type != BufferBase::BUFFER_RAW )
|
||||
{
|
||||
buffer_desc.StructureByteStride = sizeof(T);
|
||||
// buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
|
||||
}
|
||||
|
||||
if( type == BufferBase::BUFFER_STAGING )
|
||||
{
|
||||
buffer_desc.Usage = D3D11_USAGE_STAGING;
|
||||
buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
|
||||
}
|
||||
else if( type == BufferBase::BUFFER_INDEX )
|
||||
{
|
||||
buffer_desc.Usage = D3D11_USAGE_DEFAULT;
|
||||
buffer_desc.BindFlags = D3D11_BIND_INDEX_BUFFER;
|
||||
}
|
||||
else if( type == BufferBase::BUFFER_VERTEX )
|
||||
{
|
||||
buffer_desc.Usage = D3D11_USAGE_DEFAULT;
|
||||
buffer_desc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
|
||||
}
|
||||
else
|
||||
{
|
||||
buffer_desc.Usage = D3D11_USAGE_DEFAULT;
|
||||
|
||||
buffer_desc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE;
|
||||
buffer_desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
|
||||
|
||||
// check this
|
||||
if(type == BufferBase::BUFFER_RAW)
|
||||
{
|
||||
// buffer_desc.BindFlags |= D3D11_BIND_INDEX_BUFFER | D3D11_BIND_VERTEX_BUFFER;
|
||||
buffer_desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS | D3D11_RESOURCE_MISC_DRAWINDIRECT_ARGS; // need this to be used for DispatchIndirect
|
||||
}
|
||||
}
|
||||
hr = deviceData->m_device->CreateBuffer(&buffer_desc, NULL, dBuf->getBufferPtr());
|
||||
|
||||
ADLASSERT( hr == S_OK );
|
||||
|
||||
if( type == BufferBase::BUFFER_INDEX ) return;
|
||||
|
||||
if( type == BufferBase::BUFFER ||
|
||||
type == BufferBase::BUFFER_RAW ||
|
||||
type == BufferBase::BUFFER_W_COUNTER )
|
||||
{
|
||||
// Create UAVs for all CS buffers
|
||||
D3D11_UNORDERED_ACCESS_VIEW_DESC uavbuffer_desc;
|
||||
ZeroMemory(&uavbuffer_desc, sizeof(uavbuffer_desc));
|
||||
uavbuffer_desc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
|
||||
|
||||
if( type == BufferBase::BUFFER_RAW )
|
||||
{
|
||||
uavbuffer_desc.Format = DXGI_FORMAT_R32_TYPELESS;
|
||||
uavbuffer_desc.Buffer.Flags = D3D11_BUFFER_UAV_FLAG_RAW;
|
||||
uavbuffer_desc.Buffer.NumElements = buffer_desc.ByteWidth / 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
uavbuffer_desc.Format = DXGI_FORMAT_UNKNOWN;
|
||||
uavbuffer_desc.Buffer.NumElements = nElems;
|
||||
}
|
||||
|
||||
if( type == BufferBase::BUFFER_W_COUNTER )
|
||||
{
|
||||
uavbuffer_desc.Buffer.Flags = D3D11_BUFFER_UAV_FLAG_COUNTER;
|
||||
}
|
||||
|
||||
hr = deviceData->m_device->CreateUnorderedAccessView(dBuf->getBuffer(), &uavbuffer_desc, dBuf->getUAVPtr());
|
||||
ADLASSERT( hr == S_OK );
|
||||
|
||||
// Create SRVs for all CS buffers
|
||||
D3D11_SHADER_RESOURCE_VIEW_DESC srvbuffer_desc;
|
||||
ZeroMemory(&srvbuffer_desc, sizeof(srvbuffer_desc));
|
||||
if( type == BufferBase::BUFFER_RAW )
|
||||
{
|
||||
ADLASSERT( sizeof(T) <= 16 );
|
||||
srvbuffer_desc.Format = DXGI_FORMAT_R32_UINT;
|
||||
srvbuffer_desc.Buffer.ElementWidth = nElems;
|
||||
// if ( buffer_desc.MiscFlags & D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS )
|
||||
// {
|
||||
// srvbuffer_desc.Format = DXGI_FORMAT_R32_TYPELESS;
|
||||
// srvbuffer_desc.BufferEx.Flags = D3D11_BUFFEREX_SRV_FLAG_RAW;
|
||||
// srvbuffer_desc.BufferEx.NumElements = buffer_desc.ByteWidth / 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
srvbuffer_desc.Format = DXGI_FORMAT_UNKNOWN;
|
||||
srvbuffer_desc.Buffer.ElementWidth = nElems;
|
||||
}
|
||||
srvbuffer_desc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
|
||||
|
||||
hr = deviceData->m_device->CreateShaderResourceView(dBuf->getBuffer(), &srvbuffer_desc, dBuf->getSRVPtr());
|
||||
ADLASSERT( hr == S_OK );
|
||||
}
|
||||
else if( type == BufferBase::BUFFER_APPEND )
|
||||
{
|
||||
D3D11_UNORDERED_ACCESS_VIEW_DESC desc;
|
||||
ZeroMemory( &desc, sizeof(desc) );
|
||||
desc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
|
||||
desc.Buffer.FirstElement = 0;
|
||||
|
||||
desc.Buffer.Flags = D3D11_BUFFER_UAV_FLAG_APPEND;
|
||||
|
||||
desc.Format = DXGI_FORMAT_UNKNOWN; // Format must be must be DXGI_FORMAT_UNKNOWN, when creating a View of a Structured Buffer
|
||||
desc.Buffer.NumElements = buffer_desc.ByteWidth / buffer_desc.StructureByteStride;
|
||||
|
||||
hr = deviceData->m_device->CreateUnorderedAccessView( dBuf->getBuffer(), &desc, dBuf->getUAVPtr() );
|
||||
ADLASSERT( hr == S_OK );
|
||||
}
|
||||
}
|
||||
// else
|
||||
// {
|
||||
// ADLASSERT(0);
|
||||
// }
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void DeviceDX11::deallocate(Buffer<T>* buf)
|
||||
{
|
||||
BufferDX11<T>* dBuf = (BufferDX11<T>*)buf;
|
||||
|
||||
if( dBuf->getBuffer() )
|
||||
{
|
||||
dBuf->getBuffer()->Release();
|
||||
dBuf->m_ptr = NULL;
|
||||
}
|
||||
if( dBuf->getUAV() )
|
||||
{
|
||||
dBuf->getUAV()->Release();
|
||||
dBuf->m_uav = NULL;
|
||||
}
|
||||
if( dBuf->getSRV() )
|
||||
{
|
||||
dBuf->getSRV()->Release();
|
||||
dBuf->m_srv = NULL;
|
||||
}
|
||||
buf->m_device = 0;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void DeviceDX11::copy(Buffer<T>* dst, const Buffer<T>* src, int nElems)
|
||||
{
|
||||
if( dst->m_device->m_type == TYPE_DX11 || src->m_device->m_type == TYPE_DX11 )
|
||||
{
|
||||
DeviceDX11* deviceData = this;
|
||||
BufferDX11<T>* dDst = (BufferDX11<T>*)dst;
|
||||
BufferDX11<T>* dSrc = (BufferDX11<T>*)src;
|
||||
|
||||
D3D11_MAPPED_SUBRESOURCE MappedVelResource = {0};
|
||||
|
||||
D3D11_BOX destRegion;
|
||||
destRegion.left = 0*sizeof(T);
|
||||
destRegion.front = 0;
|
||||
destRegion.top = 0;
|
||||
destRegion.bottom = 1;
|
||||
destRegion.back = 1;
|
||||
destRegion.right = (0+nElems)*sizeof(T);
|
||||
|
||||
deviceData->m_context->CopySubresourceRegion(
|
||||
dDst->getBuffer(),
|
||||
0, 0, 0, 0,
|
||||
dSrc->getBuffer(),
|
||||
0,
|
||||
&destRegion );
|
||||
|
||||
}
|
||||
else if( src->m_device->m_type == TYPE_HOST )
|
||||
{
|
||||
ADLASSERT( dst->getType() == TYPE_DX11 );
|
||||
dst->write( src->m_ptr, nElems );
|
||||
}
|
||||
else if( dst->m_device->m_type == TYPE_HOST )
|
||||
{
|
||||
ADLASSERT( src->getType() == TYPE_DX11 );
|
||||
src->read( dst->m_ptr, nElems );
|
||||
}
|
||||
else
|
||||
{
|
||||
ADLASSERT( 0 );
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void DeviceDX11::copy(T* dst, const Buffer<T>* src, int nElems, int srcOffsetNElems)
|
||||
{
|
||||
DeviceDX11* deviceData = this;
|
||||
BufferDX11<T>* dSrc = (BufferDX11<T>*)src;
|
||||
Buffer<T> sBuf( deviceData, nElems, BufferBase::BUFFER_STAGING );
|
||||
BufferDX11<T>* dStagingBuf = (BufferDX11<T>*)&sBuf;
|
||||
|
||||
|
||||
ID3D11Buffer *StagingBuffer = dStagingBuf->getBuffer();
|
||||
D3D11_MAPPED_SUBRESOURCE MappedVelResource = {0};
|
||||
|
||||
D3D11_BOX destRegion;
|
||||
destRegion.left = srcOffsetNElems*sizeof(T);
|
||||
destRegion.front = 0;
|
||||
destRegion.top = 0;
|
||||
destRegion.bottom = 1;
|
||||
destRegion.back = 1;
|
||||
destRegion.right = (srcOffsetNElems+nElems)*sizeof(T);
|
||||
|
||||
deviceData->m_context->CopySubresourceRegion(
|
||||
StagingBuffer,
|
||||
0, 0, 0, 0,
|
||||
dSrc->getBuffer(),
|
||||
0,
|
||||
&destRegion);
|
||||
|
||||
deviceData->m_context->Map(StagingBuffer, 0, D3D11_MAP_READ, 0, &MappedVelResource);
|
||||
memcpy(dst, MappedVelResource.pData, nElems*sizeof(T));
|
||||
deviceData->m_context->Unmap(StagingBuffer, 0);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void DeviceDX11::copy(Buffer<T>* dst, const T* src, int nElems, int dstOffsetNElems)
|
||||
{
|
||||
BufferDX11<T>* dBuf = (BufferDX11<T>*)dst;
|
||||
|
||||
DeviceDX11* deviceData = this;
|
||||
|
||||
D3D11_BOX destRegion;
|
||||
destRegion.left = dstOffsetNElems*sizeof(T);
|
||||
destRegion.front = 0;
|
||||
destRegion.top = 0;
|
||||
destRegion.bottom = 1;
|
||||
destRegion.back = 1;
|
||||
destRegion.right = (dstOffsetNElems+nElems)*sizeof(T);
|
||||
deviceData->m_context->UpdateSubresource(dBuf->getBuffer(), 0, &destRegion, src, 0, 0);
|
||||
}
|
||||
|
||||
void DeviceDX11::waitForCompletion() const
|
||||
{
|
||||
const DeviceDX11* deviceData = this;
|
||||
|
||||
ID3D11Query* syncQuery;
|
||||
D3D11_QUERY_DESC qDesc;
|
||||
qDesc.Query = D3D11_QUERY_EVENT;
|
||||
qDesc.MiscFlags = 0;
|
||||
deviceData->m_device->CreateQuery( &qDesc, &syncQuery );
|
||||
deviceData->m_context->End( syncQuery );
|
||||
while( deviceData->m_context->GetData( syncQuery, 0,0,0 ) == S_FALSE ){}
|
||||
syncQuery->Release();
|
||||
}
|
||||
|
||||
int DeviceDX11::getNDevices()
|
||||
{
|
||||
IDXGIFactory1* factory = NULL;
|
||||
IDXGIAdapter1* adapter = NULL;
|
||||
CreateDXGIFactory1( __uuidof(IDXGIFactory1), (void**)&factory );
|
||||
|
||||
u32 i = 0;
|
||||
while( factory->EnumAdapters1( i, &adapter ) != DXGI_ERROR_NOT_FOUND )
|
||||
{
|
||||
i++;
|
||||
}
|
||||
|
||||
factory->Release();
|
||||
return i;
|
||||
}
|
||||
|
||||
void DeviceDX11::getDeviceName( char nameOut[128] ) const
|
||||
{
|
||||
IDXGIAdapter* adapter;// = getAdapterFromDevice( this );
|
||||
{
|
||||
IDXGIDevice* pDXGIDevice;
|
||||
|
||||
ADLASSERT( m_device->QueryInterface(__uuidof(IDXGIDevice), (void **)&pDXGIDevice) == S_OK );
|
||||
ADLASSERT( pDXGIDevice->GetParent(__uuidof(IDXGIAdapter), (void **)&adapter) == S_OK );
|
||||
|
||||
pDXGIDevice->Release();
|
||||
}
|
||||
DXGI_ADAPTER_DESC adapterDesc;
|
||||
adapter->GetDesc( &adapterDesc );
|
||||
|
||||
// wcstombs( nameOut, adapterDesc.Description, 128 );
|
||||
size_t i;
|
||||
wcstombs_s( &i, nameOut, 128, adapterDesc.Description, 128 );
|
||||
}
|
||||
|
||||
Kernel* DeviceDX11::getKernel(const char* fileName, const char* funcName, const char* option, const char* src, bool cacheKernel ) const
|
||||
{
|
||||
return m_kernelManager->query( this, fileName, funcName, option, src, cacheKernel );
|
||||
}
|
||||
|
||||
#undef u32
|
||||
|
||||
#undef SAFE_RELEASE
|
||||
|
||||
};
|
||||
@@ -0,0 +1,348 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
|
||||
namespace adl
|
||||
{
|
||||
|
||||
#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } }
|
||||
|
||||
struct KernelDX11 : public Kernel
|
||||
{
|
||||
ID3D11ComputeShader* getKernel() { return (ID3D11ComputeShader*)m_kernel; }
|
||||
ID3D11ComputeShader** getKernelPtr() { return (ID3D11ComputeShader**)&m_kernel; }
|
||||
};
|
||||
|
||||
|
||||
__inline
|
||||
#ifdef UNICODE
|
||||
HRESULT FindDXSDKShaderFileCch( __in_ecount(cchDest) WCHAR* strDestPath,
|
||||
int cchDest,
|
||||
__in LPCWSTR strFilename )
|
||||
#else
|
||||
HRESULT FindDXSDKShaderFileCch( __in_ecount(cchDest) CHAR* strDestPath,
|
||||
int cchDest,
|
||||
__in LPCSTR strFilename )
|
||||
#endif
|
||||
{
|
||||
if( NULL == strFilename || strFilename[0] == 0 || NULL == strDestPath || cchDest < 10 )
|
||||
return E_INVALIDARG;
|
||||
|
||||
// Get the exe name, and exe path
|
||||
#ifdef UNICODE
|
||||
WCHAR strExePath[MAX_PATH] =
|
||||
#else
|
||||
CHAR strExePath[MAX_PATH] =
|
||||
#endif
|
||||
{
|
||||
0
|
||||
};
|
||||
#ifdef UNICODE
|
||||
WCHAR strExeName[MAX_PATH] =
|
||||
#else
|
||||
CHAR strExeName[MAX_PATH] =
|
||||
#endif
|
||||
{
|
||||
0
|
||||
};
|
||||
#ifdef UNICODE
|
||||
WCHAR* strLastSlash = NULL;
|
||||
#else
|
||||
CHAR* strLastSlash = NULL;
|
||||
#endif
|
||||
GetModuleFileName( NULL, strExePath, MAX_PATH );
|
||||
strExePath[MAX_PATH - 1] = 0;
|
||||
#ifdef UNICODE
|
||||
strLastSlash = wcsrchr( strExePath, TEXT( '\\' ) );
|
||||
#else
|
||||
strLastSlash = strrchr( strExePath, TEXT( '\\' ) );
|
||||
#endif
|
||||
if( strLastSlash )
|
||||
{
|
||||
#ifdef UNICODE
|
||||
wcscpy_s( strExeName, MAX_PATH, &strLastSlash[1] );
|
||||
#else
|
||||
|
||||
#endif
|
||||
// Chop the exe name from the exe path
|
||||
*strLastSlash = 0;
|
||||
|
||||
// Chop the .exe from the exe name
|
||||
#ifdef UNICODE
|
||||
strLastSlash = wcsrchr( strExeName, TEXT( '.' ) );
|
||||
#else
|
||||
strLastSlash = strrchr( strExeName, TEXT( '.' ) );
|
||||
#endif
|
||||
if( strLastSlash )
|
||||
*strLastSlash = 0;
|
||||
}
|
||||
|
||||
// Search in directories:
|
||||
// .\
|
||||
// %EXE_DIR%\..\..\%EXE_NAME%
|
||||
#ifdef UNICODE
|
||||
wcscpy_s( strDestPath, cchDest, strFilename );
|
||||
#else
|
||||
strcpy_s( strDestPath, cchDest, strFilename );
|
||||
#endif
|
||||
if( GetFileAttributes( strDestPath ) != 0xFFFFFFFF )
|
||||
return S_OK;
|
||||
|
||||
// swprintf_s( strDestPath, cchDest, L"%s\\..\\..\\%s\\%s", strExePath, strExeName, strFilename );
|
||||
#ifdef UNICODE
|
||||
swprintf_s( strDestPath, cchDest, L"%s\\..\\%s\\%s", strExePath, strExeName, strFilename );
|
||||
#else
|
||||
sprintf_s( strDestPath, cchDest, "%s\\..\\%s\\%s", strExePath, strExeName, strFilename );
|
||||
#endif
|
||||
if( GetFileAttributes( strDestPath ) != 0xFFFFFFFF )
|
||||
return S_OK;
|
||||
|
||||
// On failure, return the file as the path but also return an error code
|
||||
#ifdef UNICODE
|
||||
wcscpy_s( strDestPath, cchDest, strFilename );
|
||||
#else
|
||||
strcpy_s( strDestPath, cchDest, strFilename );
|
||||
#endif
|
||||
|
||||
ADLASSERT( 0 );
|
||||
|
||||
return E_FAIL;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
void KernelBuilder<TYPE_DX11>::setFromFile( const Device* deviceData, const char* fileName, const char* option, bool addExtension,
|
||||
bool cacheKernel)
|
||||
{
|
||||
char fileNameWithExtension[256];
|
||||
|
||||
if( addExtension )
|
||||
sprintf_s( fileNameWithExtension, "%s.hlsl", fileName );
|
||||
else
|
||||
sprintf_s( fileNameWithExtension, "%s", fileName );
|
||||
|
||||
m_deviceData = deviceData;
|
||||
|
||||
int nameLength = (int)strlen(fileNameWithExtension)+1;
|
||||
#ifdef UNICODE
|
||||
WCHAR* wfileNameWithExtension = new WCHAR[nameLength];
|
||||
#else
|
||||
CHAR* wfileNameWithExtension = new CHAR[nameLength];
|
||||
#endif
|
||||
memset(wfileNameWithExtension,0,nameLength);
|
||||
#ifdef UNICODE
|
||||
MultiByteToWideChar(CP_ACP,0,fileNameWithExtension,-1, wfileNameWithExtension, nameLength);
|
||||
#else
|
||||
sprintf_s(wfileNameWithExtension, nameLength, "%s", fileNameWithExtension);
|
||||
#endif
|
||||
// swprintf_s(wfileNameWithExtension, nameLength*2, L"%s", fileNameWithExtension);
|
||||
|
||||
HRESULT hr;
|
||||
|
||||
// Finds the correct path for the shader file.
|
||||
// This is only required for this sample to be run correctly from within the Sample Browser,
|
||||
// in your own projects, these lines could be removed safely
|
||||
hr = FindDXSDKShaderFileCch( m_path, MAX_PATH, wfileNameWithExtension );
|
||||
|
||||
delete [] wfileNameWithExtension;
|
||||
|
||||
ADLASSERT( hr == S_OK );
|
||||
}
|
||||
|
||||
template<>
|
||||
void KernelBuilder<TYPE_DX11>::setFromSrc( const Device* deviceData, const char* src, const char* option )
|
||||
{
|
||||
m_deviceData = deviceData;
|
||||
m_ptr = (void*)src;
|
||||
m_path[0] = '0';
|
||||
}
|
||||
|
||||
template<>
|
||||
KernelBuilder<TYPE_DX11>::~KernelBuilder()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
template<>
|
||||
void KernelBuilder<TYPE_DX11>::createKernel( const char* funcName, Kernel& kernelOut )
|
||||
{
|
||||
const DeviceDX11* deviceData = (const DeviceDX11*)m_deviceData;
|
||||
KernelDX11* dxKernel = (KernelDX11*)&kernelOut;
|
||||
HRESULT hr;
|
||||
|
||||
DWORD dwShaderFlags = D3DCOMPILE_ENABLE_STRICTNESS;
|
||||
#if defined( DEBUG ) || defined( _DEBUG )
|
||||
// Set the D3DCOMPILE_DEBUG flag to embed debug information in the shaders.
|
||||
// Setting this flag improves the shader debugging experience, but still allows
|
||||
// the shaders to be optimized and to run exactly the way they will run in
|
||||
// the release configuration of this program.
|
||||
dwShaderFlags |= D3DCOMPILE_DEBUG;
|
||||
#endif
|
||||
|
||||
const D3D_SHADER_MACRO defines[] =
|
||||
{
|
||||
#ifdef USE_STRUCTURED_BUFFERS
|
||||
"USE_STRUCTURED_BUFFERS", "1",
|
||||
#endif
|
||||
|
||||
#ifdef TEST_DOUBLE
|
||||
"TEST_DOUBLE", "1",
|
||||
#endif
|
||||
NULL, NULL
|
||||
};
|
||||
|
||||
// We generally prefer to use the higher CS shader profile when possible as CS 5.0 is better performance on 11-class hardware
|
||||
LPCSTR pProfile = ( deviceData->m_device->GetFeatureLevel() >= D3D_FEATURE_LEVEL_11_0 ) ? "cs_5_0" : "cs_4_0";
|
||||
|
||||
ID3DBlob* pErrorBlob = NULL;
|
||||
ID3DBlob* pBlob = NULL;
|
||||
if( m_path[0] == '0' )
|
||||
{
|
||||
char* src = (char*)m_ptr;
|
||||
hr = D3DX11CompileFromMemory( src, strlen(src), 0, defines, NULL, funcName, pProfile,
|
||||
dwShaderFlags, NULL, NULL, &pBlob, &pErrorBlob, NULL );
|
||||
}
|
||||
else
|
||||
{
|
||||
hr = D3DX11CompileFromFile( m_path, defines, NULL, funcName, pProfile,
|
||||
dwShaderFlags, NULL, NULL, &pBlob, &pErrorBlob, NULL );
|
||||
}
|
||||
|
||||
if ( FAILED(hr) )
|
||||
{
|
||||
debugPrintf("%s", (char*)pErrorBlob->GetBufferPointer());
|
||||
}
|
||||
ADLASSERT( hr == S_OK );
|
||||
|
||||
hr = deviceData->m_device->CreateComputeShader( pBlob->GetBufferPointer(), pBlob->GetBufferSize(), NULL,
|
||||
dxKernel->getKernelPtr() );
|
||||
|
||||
#if defined(DEBUG) || defined(PROFILE)
|
||||
if ( kernelOut.m_kernel )
|
||||
kernelOut.m_kernel->SetPrivateData( WKPDID_D3DDebugObjectName, lstrlenA(pFunctionName), pFunctionName );
|
||||
#endif
|
||||
|
||||
SAFE_RELEASE( pErrorBlob );
|
||||
SAFE_RELEASE( pBlob );
|
||||
|
||||
kernelOut.m_type = TYPE_DX11;
|
||||
}
|
||||
|
||||
template<>
|
||||
void KernelBuilder<TYPE_DX11>::deleteKernel( Kernel& kernel )
|
||||
{
|
||||
KernelDX11* dxKernel = (KernelDX11*)&kernel;
|
||||
|
||||
if( kernel.m_kernel )
|
||||
{
|
||||
dxKernel->getKernel()->Release();
|
||||
kernel.m_kernel = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
class LauncherDX11
|
||||
{
|
||||
public:
|
||||
typedef Launcher::BufferInfo BufferInfo;
|
||||
|
||||
__inline
|
||||
static void setBuffers( Launcher* launcher, BufferInfo* buffInfo, int n );
|
||||
template<typename T>
|
||||
__inline
|
||||
static void setConst( Launcher* launcher, Buffer<T>& constBuff, const T& consts );
|
||||
__inline
|
||||
static void launch2D( Launcher* launcher, int numThreadsX, int numThreadsY, int localSizeX, int localSizeY );
|
||||
};
|
||||
|
||||
void LauncherDX11::setBuffers( Launcher* launcher, BufferInfo* buffInfo, int n )
|
||||
{
|
||||
KernelDX11* dxKernel = (KernelDX11*)launcher->m_kernel;
|
||||
const DeviceDX11* dddx = (const DeviceDX11*)launcher->m_deviceData;
|
||||
|
||||
for(int i=0; i<n; i++)
|
||||
{
|
||||
BufferDX11<int>* dBuf = (BufferDX11<int>*)buffInfo[i].m_buffer;
|
||||
if( buffInfo[i].m_isReadOnly )
|
||||
{
|
||||
dddx->m_context->CSSetShaderResources( launcher->m_idx++, 1, dBuf->getSRVPtr() );
|
||||
}
|
||||
else
|
||||
{
|
||||
// todo. cannot initialize append buffer with proper counter value which is the last arg
|
||||
dddx->m_context->CSSetUnorderedAccessViews( launcher->m_idxRw++, 1, dBuf->getUAVPtr(), 0 );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void LauncherDX11::setConst( Launcher* launcher, Buffer<T>& constBuff, const T& consts )
|
||||
{
|
||||
KernelDX11* dxKernel = (KernelDX11*)launcher->m_kernel;
|
||||
const DeviceDX11* dddx = (const DeviceDX11*)launcher->m_deviceData;
|
||||
BufferDX11<T>* dBuf = (BufferDX11<T>*)&constBuff;
|
||||
/*
|
||||
D3D11_MAPPED_SUBRESOURCE MappedResource;
|
||||
dddx->m_context->Map( dBuf->getBuffer(), 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
|
||||
memcpy( MappedResource.pData, &consts, sizeof(T) );
|
||||
dddx->m_context->Unmap( dBuf->getBuffer(), 0 );
|
||||
*/
|
||||
|
||||
dddx->m_context->UpdateSubresource( dBuf->getBuffer(), 0, NULL, &consts, 0, 0 );
|
||||
|
||||
dddx->m_context->CSSetConstantBuffers( 0, 1, dBuf->getBufferPtr() );
|
||||
}
|
||||
|
||||
void LauncherDX11::launch2D( Launcher* launcher, int numThreadsX, int numThreadsY, int localSizeX, int localSizeY )
|
||||
{
|
||||
KernelDX11* dxKernel = (KernelDX11*)launcher->m_kernel;
|
||||
const DeviceDX11* dddx = (const DeviceDX11*)launcher->m_deviceData;
|
||||
|
||||
dddx->m_context->CSSetShader( dxKernel->getKernel(), NULL, 0 );
|
||||
|
||||
int nx, ny, nz;
|
||||
nx = max( 1, (numThreadsX/localSizeX)+(!(numThreadsX%localSizeX)?0:1) );
|
||||
ny = max( 1, (numThreadsY/localSizeY)+(!(numThreadsY%localSizeY)?0:1) );
|
||||
nz = 1;
|
||||
|
||||
dddx->m_context->Dispatch( nx, ny, nz );
|
||||
|
||||
// set 0 to registers
|
||||
{
|
||||
dddx->m_context->CSSetShader( NULL, NULL, 0 );
|
||||
|
||||
if( launcher->m_idxRw )
|
||||
{
|
||||
ID3D11UnorderedAccessView* aUAViewsNULL[ 16 ] = { 0 };
|
||||
dddx->m_context->CSSetUnorderedAccessViews( 0,
|
||||
min( (unsigned int)launcher->m_idxRw, sizeof(aUAViewsNULL)/sizeof(*aUAViewsNULL) ), aUAViewsNULL, NULL );
|
||||
}
|
||||
|
||||
if( launcher->m_idx )
|
||||
{
|
||||
ID3D11ShaderResourceView* ppSRVNULL[16] = { 0 };
|
||||
dddx->m_context->CSSetShaderResources( 0,
|
||||
min( (unsigned int)launcher->m_idx, sizeof(ppSRVNULL)/sizeof(*ppSRVNULL) ), ppSRVNULL );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#undef SAFE_RELEASE
|
||||
|
||||
};
|
||||
@@ -0,0 +1,131 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
|
||||
namespace adl
|
||||
{
|
||||
|
||||
struct StopwatchDX11 : public StopwatchBase
|
||||
{
|
||||
public:
|
||||
__inline
|
||||
StopwatchDX11() : StopwatchBase(){}
|
||||
__inline
|
||||
~StopwatchDX11();
|
||||
|
||||
__inline
|
||||
void init( const Device* deviceData );
|
||||
__inline
|
||||
void start();
|
||||
__inline
|
||||
void split();
|
||||
__inline
|
||||
void stop();
|
||||
__inline
|
||||
float getMs(int index=0);
|
||||
__inline
|
||||
void getMs( float* times, int capacity );
|
||||
|
||||
public:
|
||||
ID3D11Query* m_tQuery[CAPACITY+1];
|
||||
ID3D11Query* m_fQuery;
|
||||
UINT64 m_t[CAPACITY];
|
||||
};
|
||||
|
||||
void StopwatchDX11::init( const Device* deviceData )
|
||||
{
|
||||
ADLASSERT( deviceData->m_type == TYPE_DX11 );
|
||||
m_device = deviceData;
|
||||
{
|
||||
D3D11_QUERY_DESC qDesc;
|
||||
qDesc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT;
|
||||
qDesc.MiscFlags = 0;
|
||||
((const DeviceDX11*)m_device)->m_device->CreateQuery( &qDesc, &m_fQuery );
|
||||
}
|
||||
for(int i=0; i<CAPACITY+1; i++)
|
||||
{
|
||||
D3D11_QUERY_DESC qDesc;
|
||||
qDesc.Query = D3D11_QUERY_TIMESTAMP;
|
||||
qDesc.MiscFlags = 0;
|
||||
((const DeviceDX11*)m_device)->m_device->CreateQuery( &qDesc, &m_tQuery[i] );
|
||||
}
|
||||
}
|
||||
|
||||
StopwatchDX11::~StopwatchDX11()
|
||||
{
|
||||
m_fQuery->Release();
|
||||
for(int i=0; i<CAPACITY+1; i++)
|
||||
{
|
||||
m_tQuery[i]->Release();
|
||||
}
|
||||
}
|
||||
|
||||
void StopwatchDX11::start()
|
||||
{
|
||||
m_idx = 0;
|
||||
((const DeviceDX11*)m_device)->m_context->Begin( m_fQuery );
|
||||
((const DeviceDX11*)m_device)->m_context->End( m_tQuery[m_idx++] );
|
||||
}
|
||||
|
||||
void StopwatchDX11::split()
|
||||
{
|
||||
if( m_idx < CAPACITY )
|
||||
((const DeviceDX11*)m_device)->m_context->End( m_tQuery[m_idx++] );
|
||||
}
|
||||
|
||||
void StopwatchDX11::stop()
|
||||
{
|
||||
((const DeviceDX11*)m_device)->m_context->End( m_tQuery[m_idx++] );
|
||||
((const DeviceDX11*)m_device)->m_context->End( m_fQuery );
|
||||
}
|
||||
|
||||
float StopwatchDX11::getMs(int index)
|
||||
{
|
||||
D3D11_QUERY_DATA_TIMESTAMP_DISJOINT d;
|
||||
// m_deviceData->m_context->End( m_fQuery );
|
||||
while( ((const DeviceDX11*)m_device)->m_context->GetData( m_fQuery, &d,sizeof(D3D11_QUERY_DATA_TIMESTAMP_DISJOINT),0 ) == S_FALSE ) {}
|
||||
|
||||
while( ((const DeviceDX11*)m_device)->m_context->GetData( m_tQuery[0], &m_t[index],sizeof(UINT64),0 ) == S_FALSE ){}
|
||||
while( ((const DeviceDX11*)m_device)->m_context->GetData( m_tQuery[1], &m_t[index+1],sizeof(UINT64),0 ) == S_FALSE ){}
|
||||
|
||||
ADLASSERT( d.Disjoint == false );
|
||||
|
||||
float elapsedMs = (m_t[index+1] - m_t[index])/(float)d.Frequency*1000;
|
||||
return elapsedMs;
|
||||
|
||||
}
|
||||
|
||||
void StopwatchDX11::getMs( float* times, int capacity )
|
||||
{
|
||||
ADLASSERT( capacity <= CAPACITY );
|
||||
|
||||
D3D11_QUERY_DATA_TIMESTAMP_DISJOINT d;
|
||||
while( ((const DeviceDX11*)m_device)->m_context->GetData( m_fQuery, &d,sizeof(D3D11_QUERY_DATA_TIMESTAMP_DISJOINT),0 ) == S_FALSE ) {}
|
||||
|
||||
for(int i=0; i<m_idx; i++)
|
||||
{
|
||||
while( ((const DeviceDX11*)m_device)->m_context->GetData( m_tQuery[i], &m_t[i],sizeof(UINT64),0 ) == S_FALSE ){}
|
||||
}
|
||||
|
||||
ADLASSERT( d.Disjoint == false );
|
||||
|
||||
for(int i=0; i<capacity; i++)
|
||||
{
|
||||
times[i] = (m_t[i+1] - m_t[i])/(float)d.Frequency*1000;
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
@@ -0,0 +1,107 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
|
||||
namespace adl
|
||||
{
|
||||
|
||||
struct DeviceHost : public Device
|
||||
{
|
||||
DeviceHost() : Device( TYPE_HOST ){}
|
||||
|
||||
__inline
|
||||
void initialize(const Config& cfg);
|
||||
__inline
|
||||
void release();
|
||||
|
||||
template<typename T>
|
||||
__inline
|
||||
void allocate(Buffer<T>* buf, int nElems, BufferBase::BufferType type);
|
||||
|
||||
template<typename T>
|
||||
__inline
|
||||
void deallocate(Buffer<T>* buf);
|
||||
|
||||
template<typename T>
|
||||
__inline
|
||||
void copy(Buffer<T>* dst, const Buffer<T>* src, int nElems);
|
||||
|
||||
template<typename T>
|
||||
__inline
|
||||
void copy(T* dst, const Buffer<T>* src, int nElems, int offsetNElems = 0);
|
||||
|
||||
template<typename T>
|
||||
__inline
|
||||
void copy(Buffer<T>* dst, const T* src, int nElems, int offsetNElems = 0);
|
||||
|
||||
__inline
|
||||
void waitForCompletion() const;
|
||||
};
|
||||
|
||||
void DeviceHost::initialize(const Config& cfg)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void DeviceHost::release()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void DeviceHost::allocate(Buffer<T>* buf, int nElems, BufferBase::BufferType type)
|
||||
{
|
||||
buf->m_device = this;
|
||||
|
||||
if( type == BufferBase::BUFFER_CONST ) return;
|
||||
|
||||
buf->m_ptr = new T[nElems];
|
||||
ADLASSERT( buf->m_ptr );
|
||||
buf->m_size = nElems;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void DeviceHost::deallocate(Buffer<T>* buf)
|
||||
{
|
||||
if( buf->m_ptr ) delete [] buf->m_ptr;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void DeviceHost::copy(Buffer<T>* dst, const Buffer<T>* src, int nElems)
|
||||
{
|
||||
copy( dst, src->m_ptr, nElems );
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void DeviceHost::copy(T* dst, const Buffer<T>* src, int nElems, int srcOffsetNElems)
|
||||
{
|
||||
ADLASSERT( src->getType() == TYPE_HOST );
|
||||
memcpy( dst, src->m_ptr+srcOffsetNElems, nElems*sizeof(T) );
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void DeviceHost::copy(Buffer<T>* dst, const T* src, int nElems, int dstOffsetNElems)
|
||||
{
|
||||
ADLASSERT( dst->getType() == TYPE_HOST );
|
||||
memcpy( dst->m_ptr+dstOffsetNElems, src, nElems*sizeof(T) );
|
||||
}
|
||||
|
||||
void DeviceHost::waitForCompletion() const
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
};
|
||||
@@ -0,0 +1,119 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#else
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
|
||||
namespace adl
|
||||
{
|
||||
|
||||
class StopwatchHost : public StopwatchBase
|
||||
{
|
||||
public:
|
||||
__inline
|
||||
StopwatchHost();
|
||||
__inline
|
||||
void init( const Device* deviceData );
|
||||
__inline
|
||||
void start();
|
||||
__inline
|
||||
void split();
|
||||
__inline
|
||||
void stop();
|
||||
__inline
|
||||
float getMs(int index=0);
|
||||
__inline
|
||||
void getMs( float* times, int capacity );
|
||||
|
||||
private:
|
||||
#ifdef _WIN32
|
||||
LARGE_INTEGER m_frequency;
|
||||
LARGE_INTEGER m_t[CAPACITY];
|
||||
#else
|
||||
struct timeval mStartTime;
|
||||
timeval m_t[CAPACITY];
|
||||
#endif
|
||||
};
|
||||
|
||||
__inline
|
||||
StopwatchHost::StopwatchHost()
|
||||
: StopwatchBase()
|
||||
{
|
||||
}
|
||||
|
||||
__inline
|
||||
void StopwatchHost::init( const Device* deviceData )
|
||||
{
|
||||
m_device = deviceData;
|
||||
#ifdef _WIN32
|
||||
QueryPerformanceFrequency( &m_frequency );
|
||||
#else
|
||||
gettimeofday(&mStartTime, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
__inline
|
||||
void StopwatchHost::start()
|
||||
{
|
||||
m_idx = 0;
|
||||
#ifdef _WIN32
|
||||
QueryPerformanceCounter(&m_t[m_idx++]);
|
||||
#else
|
||||
gettimeofday(&m_t[m_idx++], 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
__inline
|
||||
void StopwatchHost::split()
|
||||
{
|
||||
#ifdef _WIN32
|
||||
QueryPerformanceCounter(&m_t[m_idx++]);
|
||||
#else
|
||||
gettimeofday(&m_t[m_idx++], 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
__inline
|
||||
void StopwatchHost::stop()
|
||||
{
|
||||
split();
|
||||
}
|
||||
|
||||
__inline
|
||||
float StopwatchHost::getMs(int index)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
return (float)(1000*(m_t[index+1].QuadPart - m_t[index].QuadPart))/m_frequency.QuadPart;
|
||||
#else
|
||||
return (m_t[index+1].tv_sec - m_t[index].tv_sec) * 1000 +
|
||||
(m_t[index+1].tv_usec - m_t[index].tv_usec) / 1000;
|
||||
#endif
|
||||
}
|
||||
|
||||
__inline
|
||||
void StopwatchHost::getMs(float* times, int capacity)
|
||||
{
|
||||
for(int i=0; i<capacity; i++) times[i] = 0.f;
|
||||
|
||||
for(int i=0; i<min(capacity, m_idx-1); i++)
|
||||
{
|
||||
times[i] = getMs(i);
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <Adl/Adl.h>
|
||||
#include <AdlPrimitives/Math/Math.h>
|
||||
|
||||
namespace adl
|
||||
{
|
||||
|
||||
class CopyBase
|
||||
{
|
||||
public:
|
||||
enum Option
|
||||
{
|
||||
PER_WI_1,
|
||||
PER_WI_2,
|
||||
PER_WI_4,
|
||||
};
|
||||
};
|
||||
|
||||
template<DeviceType TYPE>
|
||||
class Copy : public CopyBase
|
||||
{
|
||||
public:
|
||||
typedef Launcher::BufferInfo BufferInfo;
|
||||
|
||||
struct Data
|
||||
{
|
||||
const Device* m_device;
|
||||
Kernel* m_copy1F4Kernel;
|
||||
Kernel* m_copy2F4Kernel;
|
||||
Kernel* m_copy4F4Kernel;
|
||||
Kernel* m_copyF1Kernel;
|
||||
Kernel* m_copyF2Kernel;
|
||||
Buffer<int4>* m_constBuffer;
|
||||
};
|
||||
|
||||
static
|
||||
Data* allocate(const Device* deviceData);
|
||||
|
||||
static
|
||||
void deallocate(Data* data);
|
||||
|
||||
static
|
||||
void execute( Data* data, Buffer<float4>& dst, Buffer<float4>& src, int n, Option option = PER_WI_1);
|
||||
|
||||
static
|
||||
void execute( Data* data, Buffer<float2>& dst, Buffer<float2>& src, int n);
|
||||
|
||||
static
|
||||
void execute( Data* data, Buffer<float>& dst, Buffer<float>& src, int n);
|
||||
};
|
||||
|
||||
|
||||
#include <AdlPrimitives/Copy/CopyHost.inl>
|
||||
#include <AdlPrimitives/Copy/Copy.inl>
|
||||
|
||||
};
|
||||
@@ -0,0 +1,151 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
|
||||
#define PATH "..\\..\\opencl\\primitives\\AdlPrimitives\\Copy\\CopyKernels"
|
||||
#define KERNEL0 "Copy1F4Kernel"
|
||||
#define KERNEL1 "Copy2F4Kernel"
|
||||
#define KERNEL2 "Copy4F4Kernel"
|
||||
#define KERNEL3 "CopyF1Kernel"
|
||||
#define KERNEL4 "CopyF2Kernel"
|
||||
|
||||
#include <AdlPrimitives/Copy/CopyKernelsCL.h>
|
||||
#include <AdlPrimitives/Copy/CopyKernelsDX11.h>
|
||||
|
||||
|
||||
template<DeviceType TYPE>
|
||||
typename Copy<TYPE>::Data* Copy<TYPE>::allocate( const Device* device )
|
||||
{
|
||||
ADLASSERT( TYPE == device->m_type );
|
||||
|
||||
|
||||
const char* src[] =
|
||||
#if defined(ADL_LOAD_KERNEL_FROM_STRING)
|
||||
{copyKernelsCL, copyKernelsDX11};
|
||||
// ADLASSERT(0);
|
||||
#else
|
||||
{0,0};
|
||||
#endif
|
||||
|
||||
Data* data = new Data;
|
||||
data->m_device = device;
|
||||
data->m_copy1F4Kernel = device->getKernel( PATH, KERNEL0, 0, src[TYPE] );
|
||||
data->m_copy2F4Kernel = device->getKernel( PATH, KERNEL1, 0, src[TYPE] );
|
||||
data->m_copy4F4Kernel = device->getKernel( PATH, KERNEL2, 0, src[TYPE] );
|
||||
data->m_copyF1Kernel = device->getKernel( PATH, KERNEL3, 0, src[TYPE] );
|
||||
data->m_copyF2Kernel = device->getKernel( PATH, KERNEL4, 0, src[TYPE] );
|
||||
data->m_constBuffer = new Buffer<int4>( device, 1, BufferBase::BUFFER_CONST );
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
template<DeviceType TYPE>
|
||||
void Copy<TYPE>::deallocate( Data* data )
|
||||
{
|
||||
delete data->m_constBuffer;
|
||||
delete data;
|
||||
}
|
||||
|
||||
template<DeviceType TYPE>
|
||||
void Copy<TYPE>::execute( Data* data, Buffer<float4>& dst, Buffer<float4>& src, int n, Option option )
|
||||
{
|
||||
ADLASSERT( TYPE == dst.getType() );
|
||||
ADLASSERT( TYPE == src.getType() );
|
||||
|
||||
int4 constBuffer;
|
||||
constBuffer.x = n;
|
||||
|
||||
switch (option)
|
||||
{
|
||||
case PER_WI_1:
|
||||
{
|
||||
BufferInfo bInfo[] = { BufferInfo( &dst ), BufferInfo( &src, true ) };
|
||||
|
||||
Launcher launcher( data->m_device, data->m_copy1F4Kernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
|
||||
launcher.setConst( *data->m_constBuffer, constBuffer );
|
||||
launcher.launch1D( n/1 );
|
||||
}
|
||||
break;
|
||||
case PER_WI_2:
|
||||
{
|
||||
ADLASSERT( n%2 == 0 );
|
||||
BufferInfo bInfo[] = { BufferInfo( &dst ), BufferInfo( &src, true ) };
|
||||
|
||||
Launcher launcher( data->m_device, data->m_copy2F4Kernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
|
||||
launcher.setConst( *data->m_constBuffer, constBuffer );
|
||||
launcher.launch1D( n/2 );
|
||||
}
|
||||
break;
|
||||
case PER_WI_4:
|
||||
{
|
||||
ADLASSERT( n%4 == 0 );
|
||||
BufferInfo bInfo[] = { BufferInfo( &dst ), BufferInfo( &src, true ) };
|
||||
|
||||
Launcher launcher( data->m_device, data->m_copy4F4Kernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
|
||||
launcher.setConst( *data->m_constBuffer, constBuffer );
|
||||
launcher.launch1D( n/4 );
|
||||
}
|
||||
break;
|
||||
default:
|
||||
ADLASSERT(0);
|
||||
break;
|
||||
};
|
||||
}
|
||||
|
||||
template<DeviceType TYPE>
|
||||
void Copy<TYPE>::execute( Data* data, Buffer<float2>& dst, Buffer<float2>& src, int n )
|
||||
{
|
||||
ADLASSERT( TYPE == dst.getType() );
|
||||
ADLASSERT( TYPE == src.getType() );
|
||||
|
||||
int4 constBuffer;
|
||||
constBuffer.x = n;
|
||||
|
||||
BufferInfo bInfo[] = { BufferInfo( &dst ), BufferInfo( &src, true ) };
|
||||
|
||||
Launcher launcher( data->m_device, data->m_copyF2Kernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
|
||||
launcher.setConst( *data->m_constBuffer, constBuffer );
|
||||
launcher.launch1D( n/1 );
|
||||
}
|
||||
|
||||
template<DeviceType TYPE>
|
||||
void Copy<TYPE>::execute( Data* data, Buffer<float>& dst, Buffer<float>& src, int n )
|
||||
{
|
||||
ADLASSERT( TYPE == dst.getType() );
|
||||
ADLASSERT( TYPE == src.getType() );
|
||||
|
||||
int4 constBuffer;
|
||||
constBuffer.x = n;
|
||||
|
||||
BufferInfo bInfo[] = { BufferInfo( &dst ), BufferInfo( &src, true ) };
|
||||
|
||||
Launcher launcher( data->m_device, data->m_copyF1Kernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
|
||||
launcher.setConst( *data->m_constBuffer, constBuffer );
|
||||
launcher.launch1D( n/1 );
|
||||
}
|
||||
|
||||
|
||||
#undef PATH
|
||||
#undef KERNEL0
|
||||
#undef KERNEL1
|
||||
#undef KERNEL2
|
||||
#undef KERNEL3
|
||||
#undef KERNEL4
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user