fix: some file didn't have the svn:eol-style native yet
This commit is contained in:
@@ -1,207 +1,207 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include "LinearMath/btAlignedAllocator.h"
|
||||
#include "LinearMath/btQuickprof.h"
|
||||
#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h"
|
||||
|
||||
#include "btCudaBroadphase.h"
|
||||
#include "radixsort.cuh"
|
||||
|
||||
|
||||
|
||||
#define BT_GPU_PREF(func) btCuda_##func
|
||||
#include "../../src/BulletMultiThreaded/btGpuUtilsSharedDefs.h"
|
||||
#include "../../src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedDefs.h"
|
||||
#undef BT_GPU_PREF
|
||||
|
||||
extern "C" void btCuda_setParameters(bt3DGridBroadphaseParams* hostParams);
|
||||
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
|
||||
|
||||
|
||||
btCudaBroadphase::btCudaBroadphase( btOverlappingPairCache* overlappingPairCache,
|
||||
const btVector3& worldAabbMin,const btVector3& worldAabbMax,
|
||||
int gridSizeX, int gridSizeY, int gridSizeZ,
|
||||
int maxSmallProxies, int maxLargeProxies, int maxPairsPerSmallProxy,
|
||||
int maxSmallProxiesPerCell) :
|
||||
btGpu3DGridBroadphase(overlappingPairCache, worldAabbMin, worldAabbMax, gridSizeX, gridSizeY, gridSizeZ, maxSmallProxies, maxLargeProxies, maxPairsPerSmallProxy, maxSmallProxiesPerCell)
|
||||
{
|
||||
_initialize();
|
||||
}
|
||||
|
||||
|
||||
|
||||
btCudaBroadphase::~btCudaBroadphase()
|
||||
{
|
||||
//btSimpleBroadphase will free memory of btSortedOverlappingPairCache, because m_ownsPairCache
|
||||
assert(m_bInitialized);
|
||||
_finalize();
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btCudaBroadphase::_initialize()
|
||||
{
|
||||
// allocate GPU data
|
||||
btCuda_allocateArray((void**)&m_dBodiesHash[0], m_maxHandles * 2 * sizeof(unsigned int));
|
||||
btCuda_allocateArray((void**)&m_dBodiesHash[1], m_maxHandles * 2 * sizeof(unsigned int));
|
||||
|
||||
btCuda_allocateArray((void**)&m_dCellStart, m_params.m_numCells * sizeof(unsigned int));
|
||||
|
||||
btCuda_allocateArray((void**)&m_dPairBuff, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int));
|
||||
btCuda_copyArrayToDevice(m_dPairBuff, m_hPairBuff, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int)); // needed?
|
||||
|
||||
btCuda_allocateArray((void**)&m_dPairBuffStartCurr, (m_maxHandles * 2 + 1) * sizeof(unsigned int));
|
||||
btCuda_copyArrayToDevice(m_dPairBuffStartCurr, m_hPairBuffStartCurr, (m_maxHandles * 2 + 1) * sizeof(unsigned int));
|
||||
|
||||
unsigned int numAABB = m_maxHandles + m_maxLargeHandles;
|
||||
btCuda_allocateArray((void**)&m_dAABB, numAABB * sizeof(bt3DGrid3F1U) * 2);
|
||||
|
||||
btCuda_allocateArray((void**)&m_dPairScan, (m_maxHandles + 1) * sizeof(unsigned int));
|
||||
|
||||
btCuda_allocateArray((void**)&m_dPairOut, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int));
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btCudaBroadphase::_finalize()
|
||||
{
|
||||
assert(m_bInitialized);
|
||||
btCuda_freeArray(m_dBodiesHash[0]);
|
||||
btCuda_freeArray(m_dBodiesHash[1]);
|
||||
btCuda_freeArray(m_dCellStart);
|
||||
btCuda_freeArray(m_dPairBuffStartCurr);
|
||||
btCuda_freeArray(m_dAABB);
|
||||
btCuda_freeArray(m_dPairBuff);
|
||||
btCuda_freeArray(m_dPairScan);
|
||||
btCuda_freeArray(m_dPairOut);
|
||||
}
|
||||
|
||||
|
||||
|
||||
//
|
||||
// overrides for CUDA version
|
||||
//
|
||||
|
||||
|
||||
|
||||
void btCudaBroadphase::prepareAABB()
|
||||
{
|
||||
btGpu3DGridBroadphase::prepareAABB();
|
||||
btCuda_copyArrayToDevice(m_dAABB, m_hAABB, sizeof(bt3DGrid3F1U) * 2 * (m_numHandles + m_numLargeHandles));
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btCudaBroadphase::setParameters(bt3DGridBroadphaseParams* hostParams)
|
||||
{
|
||||
btCuda_setParameters(hostParams);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btCudaBroadphase::calcHashAABB()
|
||||
{
|
||||
BT_PROFILE("btCuda_calcHashAABB");
|
||||
btCuda_calcHashAABB(m_dAABB, m_dBodiesHash[0], m_numHandles);
|
||||
// btCuda_copyArrayFromDevice((void*)m_hBodiesHash, (void*)m_dBodiesHash[0], sizeof(unsigned int) * 2 * m_numHandles);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btCudaBroadphase::sortHash()
|
||||
{
|
||||
BT_PROFILE("RadixSort-- CUDA");
|
||||
RadixSort((KeyValuePair*)m_dBodiesHash[0], (KeyValuePair*)m_dBodiesHash[1], m_numHandles, 32);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btCudaBroadphase::findCellStart()
|
||||
{
|
||||
BT_PROFILE("btCuda_findCellStart");
|
||||
btCuda_findCellStart(m_dBodiesHash[0], m_dCellStart, m_numHandles, m_params.m_numCells);
|
||||
// btCuda_copyArrayFromDevice((void*)m_hBodiesHash, (void*)m_dBodiesHash[0], sizeof(unsigned int) * 2 * m_numHandles);
|
||||
// btCuda_copyArrayFromDevice((void*)m_hCellStart, (void*)m_dCellStart, sizeof(unsigned int) * m_params.m_numCells);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btCudaBroadphase::findOverlappingPairs()
|
||||
{
|
||||
BT_PROFILE("btCuda_findOverlappingPairs");
|
||||
btCuda_findOverlappingPairs(m_dAABB, m_dBodiesHash[0], m_dCellStart, m_dPairBuff, m_dPairBuffStartCurr, m_numHandles);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btCudaBroadphase::findPairsLarge()
|
||||
{
|
||||
BT_PROFILE("btCuda_findPairsLarge");
|
||||
btCuda_findPairsLarge(m_dAABB, m_dBodiesHash[0], m_dCellStart, m_dPairBuff, m_dPairBuffStartCurr, m_numHandles, m_numLargeHandles);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btCudaBroadphase::computePairCacheChanges()
|
||||
{
|
||||
BT_PROFILE("btCuda_computePairCacheChanges");
|
||||
btCuda_computePairCacheChanges(m_dPairBuff, m_dPairBuffStartCurr, m_dPairScan, m_dAABB, m_numHandles);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btCudaBroadphase::scanOverlappingPairBuff()
|
||||
{
|
||||
btCuda_copyArrayFromDevice(m_hPairScan, m_dPairScan, sizeof(unsigned int)*(m_numHandles + 1));
|
||||
btGpu3DGridBroadphase::scanOverlappingPairBuff();
|
||||
btCuda_copyArrayToDevice(m_dPairScan, m_hPairScan, sizeof(unsigned int)*(m_numHandles + 1));
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btCudaBroadphase::squeezeOverlappingPairBuff()
|
||||
{
|
||||
BT_PROFILE("btCuda_squeezeOverlappingPairBuff");
|
||||
btCuda_squeezeOverlappingPairBuff(m_dPairBuff, m_dPairBuffStartCurr, m_dPairScan, m_dPairOut, m_dAABB, m_numHandles);
|
||||
btCuda_copyArrayFromDevice(m_hPairOut, m_dPairOut, sizeof(unsigned int) * m_hPairScan[m_numHandles]);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btCudaBroadphase::resetPool(btDispatcher* dispatcher)
|
||||
{
|
||||
btGpu3DGridBroadphase::resetPool(dispatcher);
|
||||
btCuda_copyArrayToDevice(m_dPairBuffStartCurr, m_hPairBuffStartCurr, (m_maxHandles * 2 + 1) * sizeof(unsigned int));
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include "LinearMath/btAlignedAllocator.h"
|
||||
#include "LinearMath/btQuickprof.h"
|
||||
#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h"
|
||||
|
||||
#include "btCudaBroadphase.h"
|
||||
#include "radixsort.cuh"
|
||||
|
||||
|
||||
|
||||
#define BT_GPU_PREF(func) btCuda_##func
|
||||
#include "../../src/BulletMultiThreaded/btGpuUtilsSharedDefs.h"
|
||||
#include "../../src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedDefs.h"
|
||||
#undef BT_GPU_PREF
|
||||
|
||||
extern "C" void btCuda_setParameters(bt3DGridBroadphaseParams* hostParams);
|
||||
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
|
||||
|
||||
|
||||
btCudaBroadphase::btCudaBroadphase( btOverlappingPairCache* overlappingPairCache,
|
||||
const btVector3& worldAabbMin,const btVector3& worldAabbMax,
|
||||
int gridSizeX, int gridSizeY, int gridSizeZ,
|
||||
int maxSmallProxies, int maxLargeProxies, int maxPairsPerSmallProxy,
|
||||
int maxSmallProxiesPerCell) :
|
||||
btGpu3DGridBroadphase(overlappingPairCache, worldAabbMin, worldAabbMax, gridSizeX, gridSizeY, gridSizeZ, maxSmallProxies, maxLargeProxies, maxPairsPerSmallProxy, maxSmallProxiesPerCell)
|
||||
{
|
||||
_initialize();
|
||||
}
|
||||
|
||||
|
||||
|
||||
btCudaBroadphase::~btCudaBroadphase()
|
||||
{
|
||||
//btSimpleBroadphase will free memory of btSortedOverlappingPairCache, because m_ownsPairCache
|
||||
assert(m_bInitialized);
|
||||
_finalize();
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btCudaBroadphase::_initialize()
|
||||
{
|
||||
// allocate GPU data
|
||||
btCuda_allocateArray((void**)&m_dBodiesHash[0], m_maxHandles * 2 * sizeof(unsigned int));
|
||||
btCuda_allocateArray((void**)&m_dBodiesHash[1], m_maxHandles * 2 * sizeof(unsigned int));
|
||||
|
||||
btCuda_allocateArray((void**)&m_dCellStart, m_params.m_numCells * sizeof(unsigned int));
|
||||
|
||||
btCuda_allocateArray((void**)&m_dPairBuff, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int));
|
||||
btCuda_copyArrayToDevice(m_dPairBuff, m_hPairBuff, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int)); // needed?
|
||||
|
||||
btCuda_allocateArray((void**)&m_dPairBuffStartCurr, (m_maxHandles * 2 + 1) * sizeof(unsigned int));
|
||||
btCuda_copyArrayToDevice(m_dPairBuffStartCurr, m_hPairBuffStartCurr, (m_maxHandles * 2 + 1) * sizeof(unsigned int));
|
||||
|
||||
unsigned int numAABB = m_maxHandles + m_maxLargeHandles;
|
||||
btCuda_allocateArray((void**)&m_dAABB, numAABB * sizeof(bt3DGrid3F1U) * 2);
|
||||
|
||||
btCuda_allocateArray((void**)&m_dPairScan, (m_maxHandles + 1) * sizeof(unsigned int));
|
||||
|
||||
btCuda_allocateArray((void**)&m_dPairOut, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int));
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btCudaBroadphase::_finalize()
|
||||
{
|
||||
assert(m_bInitialized);
|
||||
btCuda_freeArray(m_dBodiesHash[0]);
|
||||
btCuda_freeArray(m_dBodiesHash[1]);
|
||||
btCuda_freeArray(m_dCellStart);
|
||||
btCuda_freeArray(m_dPairBuffStartCurr);
|
||||
btCuda_freeArray(m_dAABB);
|
||||
btCuda_freeArray(m_dPairBuff);
|
||||
btCuda_freeArray(m_dPairScan);
|
||||
btCuda_freeArray(m_dPairOut);
|
||||
}
|
||||
|
||||
|
||||
|
||||
//
|
||||
// overrides for CUDA version
|
||||
//
|
||||
|
||||
|
||||
|
||||
void btCudaBroadphase::prepareAABB()
|
||||
{
|
||||
btGpu3DGridBroadphase::prepareAABB();
|
||||
btCuda_copyArrayToDevice(m_dAABB, m_hAABB, sizeof(bt3DGrid3F1U) * 2 * (m_numHandles + m_numLargeHandles));
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btCudaBroadphase::setParameters(bt3DGridBroadphaseParams* hostParams)
|
||||
{
|
||||
btCuda_setParameters(hostParams);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btCudaBroadphase::calcHashAABB()
|
||||
{
|
||||
BT_PROFILE("btCuda_calcHashAABB");
|
||||
btCuda_calcHashAABB(m_dAABB, m_dBodiesHash[0], m_numHandles);
|
||||
// btCuda_copyArrayFromDevice((void*)m_hBodiesHash, (void*)m_dBodiesHash[0], sizeof(unsigned int) * 2 * m_numHandles);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btCudaBroadphase::sortHash()
|
||||
{
|
||||
BT_PROFILE("RadixSort-- CUDA");
|
||||
RadixSort((KeyValuePair*)m_dBodiesHash[0], (KeyValuePair*)m_dBodiesHash[1], m_numHandles, 32);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btCudaBroadphase::findCellStart()
|
||||
{
|
||||
BT_PROFILE("btCuda_findCellStart");
|
||||
btCuda_findCellStart(m_dBodiesHash[0], m_dCellStart, m_numHandles, m_params.m_numCells);
|
||||
// btCuda_copyArrayFromDevice((void*)m_hBodiesHash, (void*)m_dBodiesHash[0], sizeof(unsigned int) * 2 * m_numHandles);
|
||||
// btCuda_copyArrayFromDevice((void*)m_hCellStart, (void*)m_dCellStart, sizeof(unsigned int) * m_params.m_numCells);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btCudaBroadphase::findOverlappingPairs()
|
||||
{
|
||||
BT_PROFILE("btCuda_findOverlappingPairs");
|
||||
btCuda_findOverlappingPairs(m_dAABB, m_dBodiesHash[0], m_dCellStart, m_dPairBuff, m_dPairBuffStartCurr, m_numHandles);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btCudaBroadphase::findPairsLarge()
|
||||
{
|
||||
BT_PROFILE("btCuda_findPairsLarge");
|
||||
btCuda_findPairsLarge(m_dAABB, m_dBodiesHash[0], m_dCellStart, m_dPairBuff, m_dPairBuffStartCurr, m_numHandles, m_numLargeHandles);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btCudaBroadphase::computePairCacheChanges()
|
||||
{
|
||||
BT_PROFILE("btCuda_computePairCacheChanges");
|
||||
btCuda_computePairCacheChanges(m_dPairBuff, m_dPairBuffStartCurr, m_dPairScan, m_dAABB, m_numHandles);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btCudaBroadphase::scanOverlappingPairBuff()
|
||||
{
|
||||
btCuda_copyArrayFromDevice(m_hPairScan, m_dPairScan, sizeof(unsigned int)*(m_numHandles + 1));
|
||||
btGpu3DGridBroadphase::scanOverlappingPairBuff();
|
||||
btCuda_copyArrayToDevice(m_dPairScan, m_hPairScan, sizeof(unsigned int)*(m_numHandles + 1));
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btCudaBroadphase::squeezeOverlappingPairBuff()
|
||||
{
|
||||
BT_PROFILE("btCuda_squeezeOverlappingPairBuff");
|
||||
btCuda_squeezeOverlappingPairBuff(m_dPairBuff, m_dPairBuffStartCurr, m_dPairScan, m_dPairOut, m_dAABB, m_numHandles);
|
||||
btCuda_copyArrayFromDevice(m_hPairOut, m_dPairOut, sizeof(unsigned int) * m_hPairScan[m_numHandles]);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btCudaBroadphase::resetPool(btDispatcher* dispatcher)
|
||||
{
|
||||
btGpu3DGridBroadphase::resetPool(dispatcher);
|
||||
btCuda_copyArrayToDevice(m_dPairBuffStartCurr, m_hPairBuffStartCurr, (m_maxHandles * 2 + 1) * sizeof(unsigned int));
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,74 +1,74 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <string.h>
|
||||
|
||||
#include "cutil_math.h"
|
||||
#include "math_constants.h"
|
||||
|
||||
|
||||
#include <vector_types.h>
|
||||
|
||||
|
||||
|
||||
#include "btCudaDefines.h"
|
||||
|
||||
|
||||
|
||||
#include "../../src/BulletMultiThreaded/btGpuUtilsSharedDefs.h"
|
||||
#include "../../src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedDefs.h"
|
||||
|
||||
|
||||
|
||||
__device__ inline bt3DGrid3F1U tex_fetch3F1U(float4 a) { return *((bt3DGrid3F1U*)(&a)); }
|
||||
|
||||
|
||||
|
||||
void btCuda_exit(int val);
|
||||
|
||||
|
||||
|
||||
texture<uint2, 1, cudaReadModeElementType> particleHashTex;
|
||||
texture<uint, 1, cudaReadModeElementType> cellStartTex;
|
||||
texture<float4, 1, cudaReadModeElementType> pAABBTex;
|
||||
|
||||
|
||||
|
||||
__constant__ bt3DGridBroadphaseParams params;
|
||||
|
||||
|
||||
|
||||
extern "C"
|
||||
{
|
||||
|
||||
|
||||
|
||||
void btCuda_setParameters(bt3DGridBroadphaseParams* hostParams)
|
||||
{
|
||||
// copy parameters to constant memory
|
||||
BT_GPU_SAFE_CALL(cudaMemcpyToSymbol(params, hostParams, sizeof(bt3DGridBroadphaseParams)));
|
||||
}
|
||||
|
||||
|
||||
|
||||
} // extern "C"
|
||||
|
||||
|
||||
|
||||
#include "../../src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedCode.h"
|
||||
|
||||
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <string.h>
|
||||
|
||||
#include "cutil_math.h"
|
||||
#include "math_constants.h"
|
||||
|
||||
|
||||
#include <vector_types.h>
|
||||
|
||||
|
||||
|
||||
#include "btCudaDefines.h"
|
||||
|
||||
|
||||
|
||||
#include "../../src/BulletMultiThreaded/btGpuUtilsSharedDefs.h"
|
||||
#include "../../src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedDefs.h"
|
||||
|
||||
|
||||
|
||||
__device__ inline bt3DGrid3F1U tex_fetch3F1U(float4 a) { return *((bt3DGrid3F1U*)(&a)); }
|
||||
|
||||
|
||||
|
||||
void btCuda_exit(int val);
|
||||
|
||||
|
||||
|
||||
texture<uint2, 1, cudaReadModeElementType> particleHashTex;
|
||||
texture<uint, 1, cudaReadModeElementType> cellStartTex;
|
||||
texture<float4, 1, cudaReadModeElementType> pAABBTex;
|
||||
|
||||
|
||||
|
||||
__constant__ bt3DGridBroadphaseParams params;
|
||||
|
||||
|
||||
|
||||
extern "C"
|
||||
{
|
||||
|
||||
|
||||
|
||||
void btCuda_setParameters(bt3DGridBroadphaseParams* hostParams)
|
||||
{
|
||||
// copy parameters to constant memory
|
||||
BT_GPU_SAFE_CALL(cudaMemcpyToSymbol(params, hostParams, sizeof(bt3DGridBroadphaseParams)));
|
||||
}
|
||||
|
||||
|
||||
|
||||
} // extern "C"
|
||||
|
||||
|
||||
|
||||
#include "../../src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedCode.h"
|
||||
|
||||
|
||||
|
||||
@@ -1,69 +1,69 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#ifndef CUDA_BROADPHASE_H
|
||||
#define CUDA_BROADPHASE_H
|
||||
|
||||
|
||||
|
||||
#include "BulletCollision/BroadphaseCollision/btSimpleBroadphase.h"
|
||||
|
||||
#include "../../src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedTypes.h"
|
||||
#include "../../src/BulletMultiThreaded/btGpu3DGridBroadphase.h"
|
||||
|
||||
|
||||
|
||||
///The btCudaBroadphase uses CUDA-capable GPU to compute overlapping pairs
|
||||
|
||||
class btCudaBroadphase : public btGpu3DGridBroadphase
|
||||
{
|
||||
protected:
|
||||
// GPU data
|
||||
unsigned int* m_dBodiesHash[2];
|
||||
unsigned int* m_dCellStart;
|
||||
unsigned int* m_dPairBuff;
|
||||
unsigned int* m_dPairBuffStartCurr;
|
||||
bt3DGrid3F1U* m_dAABB;
|
||||
unsigned int* m_dPairScan;
|
||||
unsigned int* m_dPairOut;
|
||||
public:
|
||||
btCudaBroadphase( btOverlappingPairCache* overlappingPairCache,
|
||||
const btVector3& worldAabbMin,const btVector3& worldAabbMax,
|
||||
int gridSizeX, int gridSizeY, int gridSizeZ,
|
||||
int maxSmallProxies, int maxLargeProxies, int maxPairsPerSmallProxies,
|
||||
int maxSmallProxiesPerCell = 8);
|
||||
virtual ~btCudaBroadphase();
|
||||
protected:
|
||||
void _initialize();
|
||||
void _finalize();
|
||||
void allocateArray(void** devPtr, unsigned int size);
|
||||
void freeArray(void* devPtr);
|
||||
// overrides for CUDA version
|
||||
virtual void setParameters(bt3DGridBroadphaseParams* hostParams);
|
||||
virtual void prepareAABB();
|
||||
virtual void calcHashAABB();
|
||||
virtual void sortHash();
|
||||
virtual void findCellStart();
|
||||
virtual void findOverlappingPairs();
|
||||
virtual void findPairsLarge();
|
||||
virtual void computePairCacheChanges();
|
||||
virtual void scanOverlappingPairBuff();
|
||||
virtual void squeezeOverlappingPairBuff();
|
||||
virtual void resetPool(btDispatcher* dispatcher);
|
||||
};
|
||||
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#ifndef CUDA_BROADPHASE_H
|
||||
#define CUDA_BROADPHASE_H
|
||||
|
||||
|
||||
|
||||
#include "BulletCollision/BroadphaseCollision/btSimpleBroadphase.h"
|
||||
|
||||
#include "../../src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedTypes.h"
|
||||
#include "../../src/BulletMultiThreaded/btGpu3DGridBroadphase.h"
|
||||
|
||||
|
||||
|
||||
///The btCudaBroadphase uses CUDA-capable GPU to compute overlapping pairs
|
||||
|
||||
class btCudaBroadphase : public btGpu3DGridBroadphase
|
||||
{
|
||||
protected:
|
||||
// GPU data
|
||||
unsigned int* m_dBodiesHash[2];
|
||||
unsigned int* m_dCellStart;
|
||||
unsigned int* m_dPairBuff;
|
||||
unsigned int* m_dPairBuffStartCurr;
|
||||
bt3DGrid3F1U* m_dAABB;
|
||||
unsigned int* m_dPairScan;
|
||||
unsigned int* m_dPairOut;
|
||||
public:
|
||||
btCudaBroadphase( btOverlappingPairCache* overlappingPairCache,
|
||||
const btVector3& worldAabbMin,const btVector3& worldAabbMax,
|
||||
int gridSizeX, int gridSizeY, int gridSizeZ,
|
||||
int maxSmallProxies, int maxLargeProxies, int maxPairsPerSmallProxies,
|
||||
int maxSmallProxiesPerCell = 8);
|
||||
virtual ~btCudaBroadphase();
|
||||
protected:
|
||||
void _initialize();
|
||||
void _finalize();
|
||||
void allocateArray(void** devPtr, unsigned int size);
|
||||
void freeArray(void* devPtr);
|
||||
// overrides for CUDA version
|
||||
virtual void setParameters(bt3DGridBroadphaseParams* hostParams);
|
||||
virtual void prepareAABB();
|
||||
virtual void calcHashAABB();
|
||||
virtual void sortHash();
|
||||
virtual void findCellStart();
|
||||
virtual void findOverlappingPairs();
|
||||
virtual void findPairsLarge();
|
||||
virtual void computePairCacheChanges();
|
||||
virtual void scanOverlappingPairBuff();
|
||||
virtual void squeezeOverlappingPairBuff();
|
||||
virtual void resetPool(btDispatcher* dispatcher);
|
||||
};
|
||||
|
||||
#endif //CUDA_BROADPHASE_H
|
||||
@@ -1,138 +1,138 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
// Common preprocessor definitions for CUDA compiler
|
||||
|
||||
|
||||
|
||||
#ifndef BTCUDADEFINES_H
|
||||
#define BTCUDADEFINES_H
|
||||
|
||||
|
||||
|
||||
#ifdef __DEVICE_EMULATION__
|
||||
#define B_CUDA_USE_TEX 0
|
||||
#else
|
||||
#define B_CUDA_USE_TEX 1
|
||||
#endif
|
||||
|
||||
|
||||
#if B_CUDA_USE_TEX
|
||||
#define BT_GPU_FETCH(t, i) tex_fetch3F1U(tex1Dfetch(t##Tex, i))
|
||||
#define BT_GPU_FETCH4(t, i) tex1Dfetch(t##Tex, i)
|
||||
#else
|
||||
#define BT_GPU_FETCH(t, i) t[i]
|
||||
#define BT_GPU_FETCH4(t, i) t[i]
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#define BT_GPU___device__ __device__
|
||||
#define BT_GPU___devdata__ __device__
|
||||
#define BT_GPU___constant__ __constant__
|
||||
#define BT_GPU_max(a, b) max(a, b)
|
||||
#define BT_GPU_min(a, b) min(a, b)
|
||||
#define BT_GPU_params params
|
||||
#define BT_GPU___mul24(a, b) __mul24(a, b)
|
||||
#define BT_GPU___global__ __global__
|
||||
#define BT_GPU___shared__ __shared__
|
||||
#define BT_GPU___syncthreads() __syncthreads()
|
||||
#define BT_GPU_make_uint2(x, y) make_uint2(x, y)
|
||||
#define BT_GPU_make_int3(x, y, z) make_int3(x, y, z)
|
||||
#define BT_GPU_make_float3(x, y, z) make_float3(x, y, z)
|
||||
#define BT_GPU_make_float34(x) make_float3(x)
|
||||
#define BT_GPU_make_float31(x) make_float3(x)
|
||||
#define BT_GPU_make_float42(a, b) make_float4(a, b)
|
||||
#define BT_GPU_make_float44(a, b, c, d) make_float4(a, b, c, d)
|
||||
#define BT_GPU_PREF(func) btCuda_##func
|
||||
#define BT_GPU_Memset cudaMemset
|
||||
#define BT_GPU_MemcpyToSymbol(a, b, c) cudaMemcpyToSymbol(a, b, c)
|
||||
#define BT_GPU_blockIdx blockIdx
|
||||
#define BT_GPU_blockDim blockDim
|
||||
#define BT_GPU_threadIdx threadIdx
|
||||
#define BT_GPU_dot(a, b) dot(a, b)
|
||||
#define BT_GPU_dot4(a, b) dot(a, b)
|
||||
#define BT_GPU_cross(a, b) cross(a, b)
|
||||
#define BT_GPU_BindTexture(a, b, c, d) cudaBindTexture(a, b, c, d)
|
||||
#define BT_GPU_UnbindTexture(a) cudaUnbindTexture(a)
|
||||
#define BT_GPU_EXECKERNEL(numb, numt, kfunc, args) kfunc<<<numb, numt>>>args
|
||||
|
||||
|
||||
|
||||
//! Check for CUDA error
|
||||
#define BT_GPU_CHECK_ERROR(errorMessage) \
|
||||
do \
|
||||
{ \
|
||||
cudaError_t err = cudaGetLastError(); \
|
||||
if(err != cudaSuccess) \
|
||||
{ \
|
||||
fprintf(stderr,"Cuda error: %s in file '%s' in line %i : %s.\n",\
|
||||
errorMessage, __FILE__, __LINE__, cudaGetErrorString( err));\
|
||||
btCuda_exit(EXIT_FAILURE); \
|
||||
} \
|
||||
err = cudaThreadSynchronize(); \
|
||||
if(err != cudaSuccess) \
|
||||
{ \
|
||||
fprintf(stderr,"Cuda error: %s in file '%s' in line %i : %s.\n",\
|
||||
errorMessage, __FILE__, __LINE__, cudaGetErrorString( err));\
|
||||
btCuda_exit(EXIT_FAILURE); \
|
||||
} \
|
||||
} \
|
||||
while(0)
|
||||
|
||||
|
||||
|
||||
#define BT_GPU_SAFE_CALL_NO_SYNC(call) \
|
||||
do \
|
||||
{ \
|
||||
cudaError err = call; \
|
||||
if(err != cudaSuccess) \
|
||||
{ \
|
||||
fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \
|
||||
__FILE__, __LINE__, cudaGetErrorString( err) ); \
|
||||
btCuda_exit(EXIT_FAILURE); \
|
||||
} \
|
||||
} \
|
||||
while(0)
|
||||
|
||||
|
||||
|
||||
#define BT_GPU_SAFE_CALL(call) \
|
||||
do \
|
||||
{ \
|
||||
BT_GPU_SAFE_CALL_NO_SYNC(call); \
|
||||
cudaError err = cudaThreadSynchronize(); \
|
||||
if(err != cudaSuccess) \
|
||||
{ \
|
||||
fprintf(stderr,"Cuda errorSync in file '%s' in line %i : %s.\n",\
|
||||
__FILE__, __LINE__, cudaGetErrorString( err) ); \
|
||||
btCuda_exit(EXIT_FAILURE); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
|
||||
|
||||
extern "C" void btCuda_exit(int val);
|
||||
|
||||
|
||||
|
||||
#endif // BTCUDADEFINES_H
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
// Common preprocessor definitions for CUDA compiler
|
||||
|
||||
|
||||
|
||||
#ifndef BTCUDADEFINES_H
|
||||
#define BTCUDADEFINES_H
|
||||
|
||||
|
||||
|
||||
#ifdef __DEVICE_EMULATION__
|
||||
#define B_CUDA_USE_TEX 0
|
||||
#else
|
||||
#define B_CUDA_USE_TEX 1
|
||||
#endif
|
||||
|
||||
|
||||
#if B_CUDA_USE_TEX
|
||||
#define BT_GPU_FETCH(t, i) tex_fetch3F1U(tex1Dfetch(t##Tex, i))
|
||||
#define BT_GPU_FETCH4(t, i) tex1Dfetch(t##Tex, i)
|
||||
#else
|
||||
#define BT_GPU_FETCH(t, i) t[i]
|
||||
#define BT_GPU_FETCH4(t, i) t[i]
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#define BT_GPU___device__ __device__
|
||||
#define BT_GPU___devdata__ __device__
|
||||
#define BT_GPU___constant__ __constant__
|
||||
#define BT_GPU_max(a, b) max(a, b)
|
||||
#define BT_GPU_min(a, b) min(a, b)
|
||||
#define BT_GPU_params params
|
||||
#define BT_GPU___mul24(a, b) __mul24(a, b)
|
||||
#define BT_GPU___global__ __global__
|
||||
#define BT_GPU___shared__ __shared__
|
||||
#define BT_GPU___syncthreads() __syncthreads()
|
||||
#define BT_GPU_make_uint2(x, y) make_uint2(x, y)
|
||||
#define BT_GPU_make_int3(x, y, z) make_int3(x, y, z)
|
||||
#define BT_GPU_make_float3(x, y, z) make_float3(x, y, z)
|
||||
#define BT_GPU_make_float34(x) make_float3(x)
|
||||
#define BT_GPU_make_float31(x) make_float3(x)
|
||||
#define BT_GPU_make_float42(a, b) make_float4(a, b)
|
||||
#define BT_GPU_make_float44(a, b, c, d) make_float4(a, b, c, d)
|
||||
#define BT_GPU_PREF(func) btCuda_##func
|
||||
#define BT_GPU_Memset cudaMemset
|
||||
#define BT_GPU_MemcpyToSymbol(a, b, c) cudaMemcpyToSymbol(a, b, c)
|
||||
#define BT_GPU_blockIdx blockIdx
|
||||
#define BT_GPU_blockDim blockDim
|
||||
#define BT_GPU_threadIdx threadIdx
|
||||
#define BT_GPU_dot(a, b) dot(a, b)
|
||||
#define BT_GPU_dot4(a, b) dot(a, b)
|
||||
#define BT_GPU_cross(a, b) cross(a, b)
|
||||
#define BT_GPU_BindTexture(a, b, c, d) cudaBindTexture(a, b, c, d)
|
||||
#define BT_GPU_UnbindTexture(a) cudaUnbindTexture(a)
|
||||
#define BT_GPU_EXECKERNEL(numb, numt, kfunc, args) kfunc<<<numb, numt>>>args
|
||||
|
||||
|
||||
|
||||
//! Check for CUDA error
|
||||
#define BT_GPU_CHECK_ERROR(errorMessage) \
|
||||
do \
|
||||
{ \
|
||||
cudaError_t err = cudaGetLastError(); \
|
||||
if(err != cudaSuccess) \
|
||||
{ \
|
||||
fprintf(stderr,"Cuda error: %s in file '%s' in line %i : %s.\n",\
|
||||
errorMessage, __FILE__, __LINE__, cudaGetErrorString( err));\
|
||||
btCuda_exit(EXIT_FAILURE); \
|
||||
} \
|
||||
err = cudaThreadSynchronize(); \
|
||||
if(err != cudaSuccess) \
|
||||
{ \
|
||||
fprintf(stderr,"Cuda error: %s in file '%s' in line %i : %s.\n",\
|
||||
errorMessage, __FILE__, __LINE__, cudaGetErrorString( err));\
|
||||
btCuda_exit(EXIT_FAILURE); \
|
||||
} \
|
||||
} \
|
||||
while(0)
|
||||
|
||||
|
||||
|
||||
#define BT_GPU_SAFE_CALL_NO_SYNC(call) \
|
||||
do \
|
||||
{ \
|
||||
cudaError err = call; \
|
||||
if(err != cudaSuccess) \
|
||||
{ \
|
||||
fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \
|
||||
__FILE__, __LINE__, cudaGetErrorString( err) ); \
|
||||
btCuda_exit(EXIT_FAILURE); \
|
||||
} \
|
||||
} \
|
||||
while(0)
|
||||
|
||||
|
||||
|
||||
#define BT_GPU_SAFE_CALL(call) \
|
||||
do \
|
||||
{ \
|
||||
BT_GPU_SAFE_CALL_NO_SYNC(call); \
|
||||
cudaError err = cudaThreadSynchronize(); \
|
||||
if(err != cudaSuccess) \
|
||||
{ \
|
||||
fprintf(stderr,"Cuda errorSync in file '%s' in line %i : %s.\n",\
|
||||
__FILE__, __LINE__, cudaGetErrorString( err) ); \
|
||||
btCuda_exit(EXIT_FAILURE); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
|
||||
|
||||
extern "C" void btCuda_exit(int val);
|
||||
|
||||
|
||||
|
||||
#endif // BTCUDADEFINES_H
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,84 +1,84 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <string.h>
|
||||
|
||||
#include <GL/glut.h>
|
||||
#include <cuda_gl_interop.h>
|
||||
|
||||
#include "cutil_math.h"
|
||||
#include "math_constants.h"
|
||||
|
||||
#include <vector_types.h>
|
||||
|
||||
|
||||
|
||||
|
||||
#include "btCudaDefines.h"
|
||||
#include "../../src/BulletMultiThreaded/btGpuUtilsSharedDefs.h"
|
||||
|
||||
|
||||
void btCuda_exit(int val)
|
||||
{
|
||||
fprintf(stderr, "Press ENTER key to terminate the program\n");
|
||||
getchar();
|
||||
exit(val);
|
||||
}
|
||||
|
||||
void btCuda_allocateArray(void** devPtr, unsigned int size)
|
||||
{
|
||||
BT_GPU_SAFE_CALL(cudaMalloc(devPtr, size));
|
||||
}
|
||||
|
||||
void btCuda_freeArray(void* devPtr)
|
||||
{
|
||||
BT_GPU_SAFE_CALL(cudaFree(devPtr));
|
||||
}
|
||||
|
||||
void btCuda_copyArrayFromDevice(void* host, const void* device, unsigned int size)
|
||||
{
|
||||
BT_GPU_SAFE_CALL(cudaMemcpy(host, device, size, cudaMemcpyDeviceToHost));
|
||||
}
|
||||
|
||||
void btCuda_copyArrayToDevice(void* device, const void* host, unsigned int size)
|
||||
{
|
||||
BT_GPU_SAFE_CALL(cudaMemcpy((char*)device, host, size, cudaMemcpyHostToDevice));
|
||||
}
|
||||
|
||||
|
||||
void btCuda_registerGLBufferObject(unsigned int vbo)
|
||||
{
|
||||
BT_GPU_SAFE_CALL(cudaGLRegisterBufferObject(vbo));
|
||||
}
|
||||
|
||||
void* btCuda_mapGLBufferObject(unsigned int vbo)
|
||||
{
|
||||
void *ptr;
|
||||
BT_GPU_SAFE_CALL(cudaGLMapBufferObject(&ptr, vbo));
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void btCuda_unmapGLBufferObject(unsigned int vbo)
|
||||
{
|
||||
BT_GPU_SAFE_CALL(cudaGLUnmapBufferObject(vbo));
|
||||
}
|
||||
|
||||
|
||||
|
||||
#include "../../src/BulletMultiThreaded/btGpuUtilsSharedCode.h"
|
||||
|
||||
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <string.h>
|
||||
|
||||
#include <GL/glut.h>
|
||||
#include <cuda_gl_interop.h>
|
||||
|
||||
#include "cutil_math.h"
|
||||
#include "math_constants.h"
|
||||
|
||||
#include <vector_types.h>
|
||||
|
||||
|
||||
|
||||
|
||||
#include "btCudaDefines.h"
|
||||
#include "../../src/BulletMultiThreaded/btGpuUtilsSharedDefs.h"
|
||||
|
||||
|
||||
void btCuda_exit(int val)
|
||||
{
|
||||
fprintf(stderr, "Press ENTER key to terminate the program\n");
|
||||
getchar();
|
||||
exit(val);
|
||||
}
|
||||
|
||||
void btCuda_allocateArray(void** devPtr, unsigned int size)
|
||||
{
|
||||
BT_GPU_SAFE_CALL(cudaMalloc(devPtr, size));
|
||||
}
|
||||
|
||||
void btCuda_freeArray(void* devPtr)
|
||||
{
|
||||
BT_GPU_SAFE_CALL(cudaFree(devPtr));
|
||||
}
|
||||
|
||||
void btCuda_copyArrayFromDevice(void* host, const void* device, unsigned int size)
|
||||
{
|
||||
BT_GPU_SAFE_CALL(cudaMemcpy(host, device, size, cudaMemcpyDeviceToHost));
|
||||
}
|
||||
|
||||
void btCuda_copyArrayToDevice(void* device, const void* host, unsigned int size)
|
||||
{
|
||||
BT_GPU_SAFE_CALL(cudaMemcpy((char*)device, host, size, cudaMemcpyHostToDevice));
|
||||
}
|
||||
|
||||
|
||||
void btCuda_registerGLBufferObject(unsigned int vbo)
|
||||
{
|
||||
BT_GPU_SAFE_CALL(cudaGLRegisterBufferObject(vbo));
|
||||
}
|
||||
|
||||
void* btCuda_mapGLBufferObject(unsigned int vbo)
|
||||
{
|
||||
void *ptr;
|
||||
BT_GPU_SAFE_CALL(cudaGLMapBufferObject(&ptr, vbo));
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void btCuda_unmapGLBufferObject(unsigned int vbo)
|
||||
{
|
||||
BT_GPU_SAFE_CALL(cudaGLUnmapBufferObject(vbo));
|
||||
}
|
||||
|
||||
|
||||
|
||||
#include "../../src/BulletMultiThreaded/btGpuUtilsSharedCode.h"
|
||||
|
||||
|
||||
|
||||
@@ -1,42 +1,42 @@
|
||||
/*
|
||||
Impulse based Rigid body simulation using CUDA
|
||||
Copyright (c) 2007 Takahiro Harada http://www.iii.u-tokyo.ac.jp/~takahiroharada/projects/impulseCUDA.html
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <string.h>
|
||||
|
||||
#include "cutil_math.h"
|
||||
#include "math_constants.h"
|
||||
|
||||
#include <vector_types.h>
|
||||
|
||||
|
||||
|
||||
#include "btCudaDefines.h"
|
||||
|
||||
|
||||
|
||||
#include "../../src/BulletMultiThreaded/btGpuUtilsSharedDefs.h"
|
||||
#include "../../Demos/Gpu2dDemo/btGpuDemo2dSharedTypes.h"
|
||||
#include "../../Demos/Gpu2dDemo/btGpuDemo2dSharedDefs.h"
|
||||
|
||||
|
||||
|
||||
texture<float4, 1, cudaReadModeElementType> posTex;
|
||||
|
||||
|
||||
|
||||
#include "../../Demos/Gpu2dDemo/btGpuDemo2dSharedCode.h"
|
||||
|
||||
/*
|
||||
Impulse based Rigid body simulation using CUDA
|
||||
Copyright (c) 2007 Takahiro Harada http://www.iii.u-tokyo.ac.jp/~takahiroharada/projects/impulseCUDA.html
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <string.h>
|
||||
|
||||
#include "cutil_math.h"
|
||||
#include "math_constants.h"
|
||||
|
||||
#include <vector_types.h>
|
||||
|
||||
|
||||
|
||||
#include "btCudaDefines.h"
|
||||
|
||||
|
||||
|
||||
#include "../../src/BulletMultiThreaded/btGpuUtilsSharedDefs.h"
|
||||
#include "../../Demos/Gpu2dDemo/btGpuDemo2dSharedTypes.h"
|
||||
#include "../../Demos/Gpu2dDemo/btGpuDemo2dSharedDefs.h"
|
||||
|
||||
|
||||
|
||||
texture<float4, 1, cudaReadModeElementType> posTex;
|
||||
|
||||
|
||||
|
||||
#include "../../Demos/Gpu2dDemo/btGpuDemo2dSharedCode.h"
|
||||
|
||||
|
||||
@@ -1,46 +1,46 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <string.h>
|
||||
|
||||
#include "../../Extras/CUDA/cutil_math.h"
|
||||
#include "math_constants.h"
|
||||
|
||||
#include <vector_types.h>
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
#include "../../Extras/CUDA/btCudaDefines.h"
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
#include "../../src/BulletMultiThreaded/btGpuUtilsSharedDefs.h"
|
||||
#include "../../Demos/Gpu3dDemo/btGpuDemo3dSharedTypes.h"
|
||||
#include "../../Demos/Gpu3dDemo/btGpuDemo3dSharedDefs.h"
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
texture<float4, 1, cudaReadModeElementType> posTex;
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
#include "../../Demos/Gpu3dDemo/btGpuDemo3dSharedCode.h"
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <string.h>
|
||||
|
||||
#include "../../Extras/CUDA/cutil_math.h"
|
||||
#include "math_constants.h"
|
||||
|
||||
#include <vector_types.h>
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
#include "../../Extras/CUDA/btCudaDefines.h"
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
#include "../../src/BulletMultiThreaded/btGpuUtilsSharedDefs.h"
|
||||
#include "../../Demos/Gpu3dDemo/btGpuDemo3dSharedTypes.h"
|
||||
#include "../../Demos/Gpu3dDemo/btGpuDemo3dSharedDefs.h"
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
texture<float4, 1, cudaReadModeElementType> posTex;
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
#include "../../Demos/Gpu3dDemo/btGpuDemo3dSharedCode.h"
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
@@ -1,86 +1,86 @@
|
||||
/*
|
||||
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO USER:
|
||||
*
|
||||
* This source code is subject to NVIDIA ownership rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
|
||||
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
|
||||
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
|
||||
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
|
||||
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
|
||||
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
|
||||
* OR PERFORMANCE OF THIS SOURCE CODE.
|
||||
*
|
||||
* U.S. Government End Users. This source code is a "commercial item" as
|
||||
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
|
||||
* "commercial computer software" and "commercial computer software
|
||||
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
|
||||
* and is provided to the U.S. Government only as a commercial end item.
|
||||
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
|
||||
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
|
||||
* source code with only those rights set forth herein.
|
||||
*/
|
||||
|
||||
#ifndef CUTIL_GL_ERROR
|
||||
#define CUTIL_GL_ERROR
|
||||
|
||||
/* CUda UTility Library */
|
||||
|
||||
// includes, system
|
||||
#ifdef _WIN32
|
||||
# define WINDOWS_LEAN_AND_MEAN
|
||||
# include <windows.h>
|
||||
# include <stdlib.h>
|
||||
# undef min
|
||||
# undef max
|
||||
#endif
|
||||
|
||||
// includes, graphics
|
||||
#if defined (__APPLE__) || defined(MACOSX)
|
||||
#include <OpenGL/gl.h>
|
||||
#include <OpenGL/glu.h>
|
||||
#else
|
||||
#include <GL/gl.h>
|
||||
#include <GL/glu.h>
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Check for OpenGL error
|
||||
//! @return CUTTrue if no GL error has been encountered, otherwise 0
|
||||
//! @param file __FILE__ macro
|
||||
//! @param line __LINE__ macro
|
||||
//! @note The GL error is listed on stderr
|
||||
//! @note This function should be used via the CHECK_ERROR_GL() macro
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
CUTBoolean CUTIL_API
|
||||
cutCheckErrorGL( const char* file, const int line)
|
||||
{
|
||||
CUTBoolean ret_val = CUTTrue;
|
||||
|
||||
// check for error
|
||||
GLenum gl_error = glGetError();
|
||||
if (gl_error != GL_NO_ERROR)
|
||||
{
|
||||
fprintf(stderr, "GL Error in file '%s' in line %d :\n", file, line);
|
||||
fprintf(stderr, "%s\n", gluErrorString(gl_error));
|
||||
ret_val = CUTFalse;
|
||||
}
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
#ifdef _DEBUG
|
||||
|
||||
#define CUT_CHECK_ERROR_GL() \
|
||||
if( CUTFalse == cutCheckErrorGL( __FILE__, __LINE__)) { \
|
||||
exit(EXIT_FAILURE); \
|
||||
}
|
||||
|
||||
#endif // _DEBUG
|
||||
|
||||
#endif // CUTIL_GL_ERROR
|
||||
/*
|
||||
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO USER:
|
||||
*
|
||||
* This source code is subject to NVIDIA ownership rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
|
||||
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
|
||||
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
|
||||
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
|
||||
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
|
||||
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
|
||||
* OR PERFORMANCE OF THIS SOURCE CODE.
|
||||
*
|
||||
* U.S. Government End Users. This source code is a "commercial item" as
|
||||
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
|
||||
* "commercial computer software" and "commercial computer software
|
||||
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
|
||||
* and is provided to the U.S. Government only as a commercial end item.
|
||||
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
|
||||
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
|
||||
* source code with only those rights set forth herein.
|
||||
*/
|
||||
|
||||
#ifndef CUTIL_GL_ERROR
|
||||
#define CUTIL_GL_ERROR
|
||||
|
||||
/* CUda UTility Library */
|
||||
|
||||
// includes, system
|
||||
#ifdef _WIN32
|
||||
# define WINDOWS_LEAN_AND_MEAN
|
||||
# include <windows.h>
|
||||
# include <stdlib.h>
|
||||
# undef min
|
||||
# undef max
|
||||
#endif
|
||||
|
||||
// includes, graphics
|
||||
#if defined (__APPLE__) || defined(MACOSX)
|
||||
#include <OpenGL/gl.h>
|
||||
#include <OpenGL/glu.h>
|
||||
#else
|
||||
#include <GL/gl.h>
|
||||
#include <GL/glu.h>
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Check for OpenGL error
|
||||
//! @return CUTTrue if no GL error has been encountered, otherwise 0
|
||||
//! @param file __FILE__ macro
|
||||
//! @param line __LINE__ macro
|
||||
//! @note The GL error is listed on stderr
|
||||
//! @note This function should be used via the CHECK_ERROR_GL() macro
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
CUTBoolean CUTIL_API
|
||||
cutCheckErrorGL( const char* file, const int line)
|
||||
{
|
||||
CUTBoolean ret_val = CUTTrue;
|
||||
|
||||
// check for error
|
||||
GLenum gl_error = glGetError();
|
||||
if (gl_error != GL_NO_ERROR)
|
||||
{
|
||||
fprintf(stderr, "GL Error in file '%s' in line %d :\n", file, line);
|
||||
fprintf(stderr, "%s\n", gluErrorString(gl_error));
|
||||
ret_val = CUTFalse;
|
||||
}
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
#ifdef _DEBUG
|
||||
|
||||
#define CUT_CHECK_ERROR_GL() \
|
||||
if( CUTFalse == cutCheckErrorGL( __FILE__, __LINE__)) { \
|
||||
exit(EXIT_FAILURE); \
|
||||
}
|
||||
|
||||
#endif // _DEBUG
|
||||
|
||||
#endif // CUTIL_GL_ERROR
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,79 +1,79 @@
|
||||
/*
|
||||
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO USER:
|
||||
*
|
||||
* This source code is subject to NVIDIA ownership rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
|
||||
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
|
||||
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
|
||||
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
|
||||
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
|
||||
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
|
||||
* OR PERFORMANCE OF THIS SOURCE CODE.
|
||||
*
|
||||
* U.S. Government End Users. This source code is a "commercial item" as
|
||||
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
|
||||
* "commercial computer software" and "commercial computer software
|
||||
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
|
||||
* and is provided to the U.S. Government only as a commercial end item.
|
||||
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
|
||||
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
|
||||
* source code with only those rights set forth herein.
|
||||
*/
|
||||
|
||||
/* Radixsort project with key/value and arbitrary datset size support
|
||||
* which demonstrates the use of CUDA in a multi phase sorting
|
||||
* computation.
|
||||
* Host code.
|
||||
*/
|
||||
|
||||
#include "radixsort.cuh"
|
||||
#include "radixsort_kernel.cu"
|
||||
|
||||
extern "C"
|
||||
{
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Perform a radix sort
|
||||
//! Sorting performed in place on passed arrays.
|
||||
//!
|
||||
//! @param pData0 input and output array - data will be sorted
|
||||
//! @param pData1 additional array to allow ping pong computation
|
||||
//! @param elements number of elements to sort
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
void RadixSort(KeyValuePair *pData0, KeyValuePair *pData1, uint elements, uint bits)
|
||||
{
|
||||
// Round element count to total number of threads for efficiency
|
||||
uint elements_rounded_to_3072;
|
||||
int modval = elements % 3072;
|
||||
if( modval == 0 )
|
||||
elements_rounded_to_3072 = elements;
|
||||
else
|
||||
elements_rounded_to_3072 = elements + (3072 - (modval));
|
||||
|
||||
// Iterate over n bytes of y bit word, using each byte to sort the list in turn
|
||||
for (uint shift = 0; shift < bits; shift += RADIX)
|
||||
{
|
||||
// Perform one round of radix sorting
|
||||
|
||||
// Generate per radix group sums radix counts across a radix group
|
||||
RadixSum<<<NUM_BLOCKS, NUM_THREADS_PER_BLOCK, GRFSIZE>>>(pData0, elements, elements_rounded_to_3072, shift);
|
||||
// Prefix sum in radix groups, and then between groups throughout a block
|
||||
RadixPrefixSum<<<PREFIX_NUM_BLOCKS, PREFIX_NUM_THREADS_PER_BLOCK, PREFIX_GRFSIZE>>>();
|
||||
// Sum the block offsets and then shuffle data into bins
|
||||
RadixAddOffsetsAndShuffle<<<NUM_BLOCKS, NUM_THREADS_PER_BLOCK, SHUFFLE_GRFSIZE>>>(pData0, pData1, elements, elements_rounded_to_3072, shift);
|
||||
|
||||
// Exchange data pointers
|
||||
KeyValuePair* pTemp = pData0;
|
||||
pData0 = pData1;
|
||||
pData1 = pTemp;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
/*
|
||||
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO USER:
|
||||
*
|
||||
* This source code is subject to NVIDIA ownership rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
|
||||
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
|
||||
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
|
||||
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
|
||||
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
|
||||
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
|
||||
* OR PERFORMANCE OF THIS SOURCE CODE.
|
||||
*
|
||||
* U.S. Government End Users. This source code is a "commercial item" as
|
||||
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
|
||||
* "commercial computer software" and "commercial computer software
|
||||
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
|
||||
* and is provided to the U.S. Government only as a commercial end item.
|
||||
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
|
||||
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
|
||||
* source code with only those rights set forth herein.
|
||||
*/
|
||||
|
||||
/* Radixsort project with key/value and arbitrary datset size support
|
||||
* which demonstrates the use of CUDA in a multi phase sorting
|
||||
* computation.
|
||||
* Host code.
|
||||
*/
|
||||
|
||||
#include "radixsort.cuh"
|
||||
#include "radixsort_kernel.cu"
|
||||
|
||||
extern "C"
|
||||
{
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Perform a radix sort
|
||||
//! Sorting performed in place on passed arrays.
|
||||
//!
|
||||
//! @param pData0 input and output array - data will be sorted
|
||||
//! @param pData1 additional array to allow ping pong computation
|
||||
//! @param elements number of elements to sort
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
void RadixSort(KeyValuePair *pData0, KeyValuePair *pData1, uint elements, uint bits)
|
||||
{
|
||||
// Round element count to total number of threads for efficiency
|
||||
uint elements_rounded_to_3072;
|
||||
int modval = elements % 3072;
|
||||
if( modval == 0 )
|
||||
elements_rounded_to_3072 = elements;
|
||||
else
|
||||
elements_rounded_to_3072 = elements + (3072 - (modval));
|
||||
|
||||
// Iterate over n bytes of y bit word, using each byte to sort the list in turn
|
||||
for (uint shift = 0; shift < bits; shift += RADIX)
|
||||
{
|
||||
// Perform one round of radix sorting
|
||||
|
||||
// Generate per radix group sums radix counts across a radix group
|
||||
RadixSum<<<NUM_BLOCKS, NUM_THREADS_PER_BLOCK, GRFSIZE>>>(pData0, elements, elements_rounded_to_3072, shift);
|
||||
// Prefix sum in radix groups, and then between groups throughout a block
|
||||
RadixPrefixSum<<<PREFIX_NUM_BLOCKS, PREFIX_NUM_THREADS_PER_BLOCK, PREFIX_GRFSIZE>>>();
|
||||
// Sum the block offsets and then shuffle data into bins
|
||||
RadixAddOffsetsAndShuffle<<<NUM_BLOCKS, NUM_THREADS_PER_BLOCK, SHUFFLE_GRFSIZE>>>(pData0, pData1, elements, elements_rounded_to_3072, shift);
|
||||
|
||||
// Exchange data pointers
|
||||
KeyValuePair* pTemp = pData0;
|
||||
pData0 = pData1;
|
||||
pData1 = pTemp;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -1,63 +1,63 @@
|
||||
/*
|
||||
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO USER:
|
||||
*
|
||||
* This source code is subject to NVIDIA ownership rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
|
||||
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
|
||||
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
|
||||
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
|
||||
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
|
||||
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
|
||||
* OR PERFORMANCE OF THIS SOURCE CODE.
|
||||
*
|
||||
* U.S. Government End Users. This source code is a "commercial item" as
|
||||
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
|
||||
* "commercial computer software" and "commercial computer software
|
||||
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
|
||||
* and is provided to the U.S. Government only as a commercial end item.
|
||||
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
|
||||
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
|
||||
* source code with only those rights set forth herein.
|
||||
*/
|
||||
|
||||
/* Radixsort project which demonstrates the use of CUDA in a multi phase
|
||||
* sorting computation.
|
||||
* Type definitions.
|
||||
*/
|
||||
|
||||
#ifndef _RADIXSORT_H_
|
||||
#define _RADIXSORT_H_
|
||||
|
||||
#include <host_defines.h>
|
||||
|
||||
#define SYNCIT __syncthreads()
|
||||
|
||||
// Use 16 bit keys/values
|
||||
#define SIXTEEN 0
|
||||
|
||||
typedef unsigned int uint;
|
||||
typedef unsigned short ushort;
|
||||
|
||||
#if SIXTEEN
|
||||
typedef struct __align__(4) {
|
||||
ushort key;
|
||||
ushort value;
|
||||
#else
|
||||
typedef struct __align__(8) {
|
||||
uint key;
|
||||
uint value;
|
||||
#endif
|
||||
} KeyValuePair;
|
||||
|
||||
extern "C" {
|
||||
void RadixSort(KeyValuePair *pData0, KeyValuePair *pData1, uint elements, uint bits);
|
||||
}
|
||||
|
||||
#endif // #ifndef _RADIXSORT_H_
|
||||
/*
|
||||
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO USER:
|
||||
*
|
||||
* This source code is subject to NVIDIA ownership rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
|
||||
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
|
||||
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
|
||||
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
|
||||
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
|
||||
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
|
||||
* OR PERFORMANCE OF THIS SOURCE CODE.
|
||||
*
|
||||
* U.S. Government End Users. This source code is a "commercial item" as
|
||||
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
|
||||
* "commercial computer software" and "commercial computer software
|
||||
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
|
||||
* and is provided to the U.S. Government only as a commercial end item.
|
||||
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
|
||||
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
|
||||
* source code with only those rights set forth herein.
|
||||
*/
|
||||
|
||||
/* Radixsort project which demonstrates the use of CUDA in a multi phase
|
||||
* sorting computation.
|
||||
* Type definitions.
|
||||
*/
|
||||
|
||||
#ifndef _RADIXSORT_H_
|
||||
#define _RADIXSORT_H_
|
||||
|
||||
#include <host_defines.h>
|
||||
|
||||
#define SYNCIT __syncthreads()
|
||||
|
||||
// Use 16 bit keys/values
|
||||
#define SIXTEEN 0
|
||||
|
||||
typedef unsigned int uint;
|
||||
typedef unsigned short ushort;
|
||||
|
||||
#if SIXTEEN
|
||||
typedef struct __align__(4) {
|
||||
ushort key;
|
||||
ushort value;
|
||||
#else
|
||||
typedef struct __align__(8) {
|
||||
uint key;
|
||||
uint value;
|
||||
#endif
|
||||
} KeyValuePair;
|
||||
|
||||
extern "C" {
|
||||
void RadixSort(KeyValuePair *pData0, KeyValuePair *pData1, uint elements, uint bits);
|
||||
}
|
||||
|
||||
#endif // #ifndef _RADIXSORT_H_
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user