fix: some file didn't have the svn:eol-style native yet

This commit is contained in:
erwin.coumans
2010-03-06 15:23:36 +00:00
parent 4fd48ac691
commit 81f04a4d48
641 changed files with 301123 additions and 301123 deletions

View File

@@ -1,207 +1,207 @@
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "LinearMath/btAlignedAllocator.h"
#include "LinearMath/btQuickprof.h"
#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h"
#include "btCudaBroadphase.h"
#include "radixsort.cuh"
#define BT_GPU_PREF(func) btCuda_##func
#include "../../src/BulletMultiThreaded/btGpuUtilsSharedDefs.h"
#include "../../src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedDefs.h"
#undef BT_GPU_PREF
extern "C" void btCuda_setParameters(bt3DGridBroadphaseParams* hostParams);
#include <stdio.h>
btCudaBroadphase::btCudaBroadphase( btOverlappingPairCache* overlappingPairCache,
const btVector3& worldAabbMin,const btVector3& worldAabbMax,
int gridSizeX, int gridSizeY, int gridSizeZ,
int maxSmallProxies, int maxLargeProxies, int maxPairsPerSmallProxy,
int maxSmallProxiesPerCell) :
btGpu3DGridBroadphase(overlappingPairCache, worldAabbMin, worldAabbMax, gridSizeX, gridSizeY, gridSizeZ, maxSmallProxies, maxLargeProxies, maxPairsPerSmallProxy, maxSmallProxiesPerCell)
{
_initialize();
}
btCudaBroadphase::~btCudaBroadphase()
{
//btSimpleBroadphase will free memory of btSortedOverlappingPairCache, because m_ownsPairCache
assert(m_bInitialized);
_finalize();
}
void btCudaBroadphase::_initialize()
{
// allocate GPU data
btCuda_allocateArray((void**)&m_dBodiesHash[0], m_maxHandles * 2 * sizeof(unsigned int));
btCuda_allocateArray((void**)&m_dBodiesHash[1], m_maxHandles * 2 * sizeof(unsigned int));
btCuda_allocateArray((void**)&m_dCellStart, m_params.m_numCells * sizeof(unsigned int));
btCuda_allocateArray((void**)&m_dPairBuff, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int));
btCuda_copyArrayToDevice(m_dPairBuff, m_hPairBuff, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int)); // needed?
btCuda_allocateArray((void**)&m_dPairBuffStartCurr, (m_maxHandles * 2 + 1) * sizeof(unsigned int));
btCuda_copyArrayToDevice(m_dPairBuffStartCurr, m_hPairBuffStartCurr, (m_maxHandles * 2 + 1) * sizeof(unsigned int));
unsigned int numAABB = m_maxHandles + m_maxLargeHandles;
btCuda_allocateArray((void**)&m_dAABB, numAABB * sizeof(bt3DGrid3F1U) * 2);
btCuda_allocateArray((void**)&m_dPairScan, (m_maxHandles + 1) * sizeof(unsigned int));
btCuda_allocateArray((void**)&m_dPairOut, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int));
}
void btCudaBroadphase::_finalize()
{
assert(m_bInitialized);
btCuda_freeArray(m_dBodiesHash[0]);
btCuda_freeArray(m_dBodiesHash[1]);
btCuda_freeArray(m_dCellStart);
btCuda_freeArray(m_dPairBuffStartCurr);
btCuda_freeArray(m_dAABB);
btCuda_freeArray(m_dPairBuff);
btCuda_freeArray(m_dPairScan);
btCuda_freeArray(m_dPairOut);
}
//
// overrides for CUDA version
//
void btCudaBroadphase::prepareAABB()
{
btGpu3DGridBroadphase::prepareAABB();
btCuda_copyArrayToDevice(m_dAABB, m_hAABB, sizeof(bt3DGrid3F1U) * 2 * (m_numHandles + m_numLargeHandles));
return;
}
void btCudaBroadphase::setParameters(bt3DGridBroadphaseParams* hostParams)
{
btCuda_setParameters(hostParams);
return;
}
void btCudaBroadphase::calcHashAABB()
{
BT_PROFILE("btCuda_calcHashAABB");
btCuda_calcHashAABB(m_dAABB, m_dBodiesHash[0], m_numHandles);
// btCuda_copyArrayFromDevice((void*)m_hBodiesHash, (void*)m_dBodiesHash[0], sizeof(unsigned int) * 2 * m_numHandles);
return;
}
void btCudaBroadphase::sortHash()
{
BT_PROFILE("RadixSort-- CUDA");
RadixSort((KeyValuePair*)m_dBodiesHash[0], (KeyValuePair*)m_dBodiesHash[1], m_numHandles, 32);
return;
}
void btCudaBroadphase::findCellStart()
{
BT_PROFILE("btCuda_findCellStart");
btCuda_findCellStart(m_dBodiesHash[0], m_dCellStart, m_numHandles, m_params.m_numCells);
// btCuda_copyArrayFromDevice((void*)m_hBodiesHash, (void*)m_dBodiesHash[0], sizeof(unsigned int) * 2 * m_numHandles);
// btCuda_copyArrayFromDevice((void*)m_hCellStart, (void*)m_dCellStart, sizeof(unsigned int) * m_params.m_numCells);
return;
}
void btCudaBroadphase::findOverlappingPairs()
{
BT_PROFILE("btCuda_findOverlappingPairs");
btCuda_findOverlappingPairs(m_dAABB, m_dBodiesHash[0], m_dCellStart, m_dPairBuff, m_dPairBuffStartCurr, m_numHandles);
return;
}
void btCudaBroadphase::findPairsLarge()
{
BT_PROFILE("btCuda_findPairsLarge");
btCuda_findPairsLarge(m_dAABB, m_dBodiesHash[0], m_dCellStart, m_dPairBuff, m_dPairBuffStartCurr, m_numHandles, m_numLargeHandles);
return;
}
void btCudaBroadphase::computePairCacheChanges()
{
BT_PROFILE("btCuda_computePairCacheChanges");
btCuda_computePairCacheChanges(m_dPairBuff, m_dPairBuffStartCurr, m_dPairScan, m_dAABB, m_numHandles);
return;
}
void btCudaBroadphase::scanOverlappingPairBuff()
{
btCuda_copyArrayFromDevice(m_hPairScan, m_dPairScan, sizeof(unsigned int)*(m_numHandles + 1));
btGpu3DGridBroadphase::scanOverlappingPairBuff();
btCuda_copyArrayToDevice(m_dPairScan, m_hPairScan, sizeof(unsigned int)*(m_numHandles + 1));
return;
}
void btCudaBroadphase::squeezeOverlappingPairBuff()
{
BT_PROFILE("btCuda_squeezeOverlappingPairBuff");
btCuda_squeezeOverlappingPairBuff(m_dPairBuff, m_dPairBuffStartCurr, m_dPairScan, m_dPairOut, m_dAABB, m_numHandles);
btCuda_copyArrayFromDevice(m_hPairOut, m_dPairOut, sizeof(unsigned int) * m_hPairScan[m_numHandles]);
return;
}
void btCudaBroadphase::resetPool(btDispatcher* dispatcher)
{
btGpu3DGridBroadphase::resetPool(dispatcher);
btCuda_copyArrayToDevice(m_dPairBuffStartCurr, m_hPairBuffStartCurr, (m_maxHandles * 2 + 1) * sizeof(unsigned int));
}
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "LinearMath/btAlignedAllocator.h"
#include "LinearMath/btQuickprof.h"
#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h"
#include "btCudaBroadphase.h"
#include "radixsort.cuh"
#define BT_GPU_PREF(func) btCuda_##func
#include "../../src/BulletMultiThreaded/btGpuUtilsSharedDefs.h"
#include "../../src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedDefs.h"
#undef BT_GPU_PREF
extern "C" void btCuda_setParameters(bt3DGridBroadphaseParams* hostParams);
#include <stdio.h>
btCudaBroadphase::btCudaBroadphase( btOverlappingPairCache* overlappingPairCache,
const btVector3& worldAabbMin,const btVector3& worldAabbMax,
int gridSizeX, int gridSizeY, int gridSizeZ,
int maxSmallProxies, int maxLargeProxies, int maxPairsPerSmallProxy,
int maxSmallProxiesPerCell) :
btGpu3DGridBroadphase(overlappingPairCache, worldAabbMin, worldAabbMax, gridSizeX, gridSizeY, gridSizeZ, maxSmallProxies, maxLargeProxies, maxPairsPerSmallProxy, maxSmallProxiesPerCell)
{
_initialize();
}
btCudaBroadphase::~btCudaBroadphase()
{
//btSimpleBroadphase will free memory of btSortedOverlappingPairCache, because m_ownsPairCache
assert(m_bInitialized);
_finalize();
}
void btCudaBroadphase::_initialize()
{
// allocate GPU data
btCuda_allocateArray((void**)&m_dBodiesHash[0], m_maxHandles * 2 * sizeof(unsigned int));
btCuda_allocateArray((void**)&m_dBodiesHash[1], m_maxHandles * 2 * sizeof(unsigned int));
btCuda_allocateArray((void**)&m_dCellStart, m_params.m_numCells * sizeof(unsigned int));
btCuda_allocateArray((void**)&m_dPairBuff, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int));
btCuda_copyArrayToDevice(m_dPairBuff, m_hPairBuff, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int)); // needed?
btCuda_allocateArray((void**)&m_dPairBuffStartCurr, (m_maxHandles * 2 + 1) * sizeof(unsigned int));
btCuda_copyArrayToDevice(m_dPairBuffStartCurr, m_hPairBuffStartCurr, (m_maxHandles * 2 + 1) * sizeof(unsigned int));
unsigned int numAABB = m_maxHandles + m_maxLargeHandles;
btCuda_allocateArray((void**)&m_dAABB, numAABB * sizeof(bt3DGrid3F1U) * 2);
btCuda_allocateArray((void**)&m_dPairScan, (m_maxHandles + 1) * sizeof(unsigned int));
btCuda_allocateArray((void**)&m_dPairOut, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int));
}
void btCudaBroadphase::_finalize()
{
assert(m_bInitialized);
btCuda_freeArray(m_dBodiesHash[0]);
btCuda_freeArray(m_dBodiesHash[1]);
btCuda_freeArray(m_dCellStart);
btCuda_freeArray(m_dPairBuffStartCurr);
btCuda_freeArray(m_dAABB);
btCuda_freeArray(m_dPairBuff);
btCuda_freeArray(m_dPairScan);
btCuda_freeArray(m_dPairOut);
}
//
// overrides for CUDA version
//
void btCudaBroadphase::prepareAABB()
{
btGpu3DGridBroadphase::prepareAABB();
btCuda_copyArrayToDevice(m_dAABB, m_hAABB, sizeof(bt3DGrid3F1U) * 2 * (m_numHandles + m_numLargeHandles));
return;
}
void btCudaBroadphase::setParameters(bt3DGridBroadphaseParams* hostParams)
{
btCuda_setParameters(hostParams);
return;
}
void btCudaBroadphase::calcHashAABB()
{
BT_PROFILE("btCuda_calcHashAABB");
btCuda_calcHashAABB(m_dAABB, m_dBodiesHash[0], m_numHandles);
// btCuda_copyArrayFromDevice((void*)m_hBodiesHash, (void*)m_dBodiesHash[0], sizeof(unsigned int) * 2 * m_numHandles);
return;
}
void btCudaBroadphase::sortHash()
{
BT_PROFILE("RadixSort-- CUDA");
RadixSort((KeyValuePair*)m_dBodiesHash[0], (KeyValuePair*)m_dBodiesHash[1], m_numHandles, 32);
return;
}
void btCudaBroadphase::findCellStart()
{
BT_PROFILE("btCuda_findCellStart");
btCuda_findCellStart(m_dBodiesHash[0], m_dCellStart, m_numHandles, m_params.m_numCells);
// btCuda_copyArrayFromDevice((void*)m_hBodiesHash, (void*)m_dBodiesHash[0], sizeof(unsigned int) * 2 * m_numHandles);
// btCuda_copyArrayFromDevice((void*)m_hCellStart, (void*)m_dCellStart, sizeof(unsigned int) * m_params.m_numCells);
return;
}
void btCudaBroadphase::findOverlappingPairs()
{
BT_PROFILE("btCuda_findOverlappingPairs");
btCuda_findOverlappingPairs(m_dAABB, m_dBodiesHash[0], m_dCellStart, m_dPairBuff, m_dPairBuffStartCurr, m_numHandles);
return;
}
void btCudaBroadphase::findPairsLarge()
{
BT_PROFILE("btCuda_findPairsLarge");
btCuda_findPairsLarge(m_dAABB, m_dBodiesHash[0], m_dCellStart, m_dPairBuff, m_dPairBuffStartCurr, m_numHandles, m_numLargeHandles);
return;
}
void btCudaBroadphase::computePairCacheChanges()
{
BT_PROFILE("btCuda_computePairCacheChanges");
btCuda_computePairCacheChanges(m_dPairBuff, m_dPairBuffStartCurr, m_dPairScan, m_dAABB, m_numHandles);
return;
}
void btCudaBroadphase::scanOverlappingPairBuff()
{
btCuda_copyArrayFromDevice(m_hPairScan, m_dPairScan, sizeof(unsigned int)*(m_numHandles + 1));
btGpu3DGridBroadphase::scanOverlappingPairBuff();
btCuda_copyArrayToDevice(m_dPairScan, m_hPairScan, sizeof(unsigned int)*(m_numHandles + 1));
return;
}
void btCudaBroadphase::squeezeOverlappingPairBuff()
{
BT_PROFILE("btCuda_squeezeOverlappingPairBuff");
btCuda_squeezeOverlappingPairBuff(m_dPairBuff, m_dPairBuffStartCurr, m_dPairScan, m_dPairOut, m_dAABB, m_numHandles);
btCuda_copyArrayFromDevice(m_hPairOut, m_dPairOut, sizeof(unsigned int) * m_hPairScan[m_numHandles]);
return;
}
void btCudaBroadphase::resetPool(btDispatcher* dispatcher)
{
btGpu3DGridBroadphase::resetPool(dispatcher);
btCuda_copyArrayToDevice(m_dPairBuffStartCurr, m_hPairBuffStartCurr, (m_maxHandles * 2 + 1) * sizeof(unsigned int));
}

View File

@@ -1,74 +1,74 @@
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include <cstdlib>
#include <cstdio>
#include <string.h>
#include "cutil_math.h"
#include "math_constants.h"
#include <vector_types.h>
#include "btCudaDefines.h"
#include "../../src/BulletMultiThreaded/btGpuUtilsSharedDefs.h"
#include "../../src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedDefs.h"
__device__ inline bt3DGrid3F1U tex_fetch3F1U(float4 a) { return *((bt3DGrid3F1U*)(&a)); }
void btCuda_exit(int val);
texture<uint2, 1, cudaReadModeElementType> particleHashTex;
texture<uint, 1, cudaReadModeElementType> cellStartTex;
texture<float4, 1, cudaReadModeElementType> pAABBTex;
__constant__ bt3DGridBroadphaseParams params;
extern "C"
{
void btCuda_setParameters(bt3DGridBroadphaseParams* hostParams)
{
// copy parameters to constant memory
BT_GPU_SAFE_CALL(cudaMemcpyToSymbol(params, hostParams, sizeof(bt3DGridBroadphaseParams)));
}
} // extern "C"
#include "../../src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedCode.h"
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include <cstdlib>
#include <cstdio>
#include <string.h>
#include "cutil_math.h"
#include "math_constants.h"
#include <vector_types.h>
#include "btCudaDefines.h"
#include "../../src/BulletMultiThreaded/btGpuUtilsSharedDefs.h"
#include "../../src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedDefs.h"
__device__ inline bt3DGrid3F1U tex_fetch3F1U(float4 a) { return *((bt3DGrid3F1U*)(&a)); }
void btCuda_exit(int val);
texture<uint2, 1, cudaReadModeElementType> particleHashTex;
texture<uint, 1, cudaReadModeElementType> cellStartTex;
texture<float4, 1, cudaReadModeElementType> pAABBTex;
__constant__ bt3DGridBroadphaseParams params;
extern "C"
{
void btCuda_setParameters(bt3DGridBroadphaseParams* hostParams)
{
// copy parameters to constant memory
BT_GPU_SAFE_CALL(cudaMemcpyToSymbol(params, hostParams, sizeof(bt3DGridBroadphaseParams)));
}
} // extern "C"
#include "../../src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedCode.h"

View File

@@ -1,69 +1,69 @@
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef CUDA_BROADPHASE_H
#define CUDA_BROADPHASE_H
#include "BulletCollision/BroadphaseCollision/btSimpleBroadphase.h"
#include "../../src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedTypes.h"
#include "../../src/BulletMultiThreaded/btGpu3DGridBroadphase.h"
///The btCudaBroadphase uses CUDA-capable GPU to compute overlapping pairs
class btCudaBroadphase : public btGpu3DGridBroadphase
{
protected:
// GPU data
unsigned int* m_dBodiesHash[2];
unsigned int* m_dCellStart;
unsigned int* m_dPairBuff;
unsigned int* m_dPairBuffStartCurr;
bt3DGrid3F1U* m_dAABB;
unsigned int* m_dPairScan;
unsigned int* m_dPairOut;
public:
btCudaBroadphase( btOverlappingPairCache* overlappingPairCache,
const btVector3& worldAabbMin,const btVector3& worldAabbMax,
int gridSizeX, int gridSizeY, int gridSizeZ,
int maxSmallProxies, int maxLargeProxies, int maxPairsPerSmallProxies,
int maxSmallProxiesPerCell = 8);
virtual ~btCudaBroadphase();
protected:
void _initialize();
void _finalize();
void allocateArray(void** devPtr, unsigned int size);
void freeArray(void* devPtr);
// overrides for CUDA version
virtual void setParameters(bt3DGridBroadphaseParams* hostParams);
virtual void prepareAABB();
virtual void calcHashAABB();
virtual void sortHash();
virtual void findCellStart();
virtual void findOverlappingPairs();
virtual void findPairsLarge();
virtual void computePairCacheChanges();
virtual void scanOverlappingPairBuff();
virtual void squeezeOverlappingPairBuff();
virtual void resetPool(btDispatcher* dispatcher);
};
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef CUDA_BROADPHASE_H
#define CUDA_BROADPHASE_H
#include "BulletCollision/BroadphaseCollision/btSimpleBroadphase.h"
#include "../../src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedTypes.h"
#include "../../src/BulletMultiThreaded/btGpu3DGridBroadphase.h"
///The btCudaBroadphase uses CUDA-capable GPU to compute overlapping pairs
class btCudaBroadphase : public btGpu3DGridBroadphase
{
protected:
// GPU data
unsigned int* m_dBodiesHash[2];
unsigned int* m_dCellStart;
unsigned int* m_dPairBuff;
unsigned int* m_dPairBuffStartCurr;
bt3DGrid3F1U* m_dAABB;
unsigned int* m_dPairScan;
unsigned int* m_dPairOut;
public:
btCudaBroadphase( btOverlappingPairCache* overlappingPairCache,
const btVector3& worldAabbMin,const btVector3& worldAabbMax,
int gridSizeX, int gridSizeY, int gridSizeZ,
int maxSmallProxies, int maxLargeProxies, int maxPairsPerSmallProxies,
int maxSmallProxiesPerCell = 8);
virtual ~btCudaBroadphase();
protected:
void _initialize();
void _finalize();
void allocateArray(void** devPtr, unsigned int size);
void freeArray(void* devPtr);
// overrides for CUDA version
virtual void setParameters(bt3DGridBroadphaseParams* hostParams);
virtual void prepareAABB();
virtual void calcHashAABB();
virtual void sortHash();
virtual void findCellStart();
virtual void findOverlappingPairs();
virtual void findPairsLarge();
virtual void computePairCacheChanges();
virtual void scanOverlappingPairBuff();
virtual void squeezeOverlappingPairBuff();
virtual void resetPool(btDispatcher* dispatcher);
};
#endif //CUDA_BROADPHASE_H

View File

@@ -1,138 +1,138 @@
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
// Common preprocessor definitions for CUDA compiler
#ifndef BTCUDADEFINES_H
#define BTCUDADEFINES_H
#ifdef __DEVICE_EMULATION__
#define B_CUDA_USE_TEX 0
#else
#define B_CUDA_USE_TEX 1
#endif
#if B_CUDA_USE_TEX
#define BT_GPU_FETCH(t, i) tex_fetch3F1U(tex1Dfetch(t##Tex, i))
#define BT_GPU_FETCH4(t, i) tex1Dfetch(t##Tex, i)
#else
#define BT_GPU_FETCH(t, i) t[i]
#define BT_GPU_FETCH4(t, i) t[i]
#endif
#define BT_GPU___device__ __device__
#define BT_GPU___devdata__ __device__
#define BT_GPU___constant__ __constant__
#define BT_GPU_max(a, b) max(a, b)
#define BT_GPU_min(a, b) min(a, b)
#define BT_GPU_params params
#define BT_GPU___mul24(a, b) __mul24(a, b)
#define BT_GPU___global__ __global__
#define BT_GPU___shared__ __shared__
#define BT_GPU___syncthreads() __syncthreads()
#define BT_GPU_make_uint2(x, y) make_uint2(x, y)
#define BT_GPU_make_int3(x, y, z) make_int3(x, y, z)
#define BT_GPU_make_float3(x, y, z) make_float3(x, y, z)
#define BT_GPU_make_float34(x) make_float3(x)
#define BT_GPU_make_float31(x) make_float3(x)
#define BT_GPU_make_float42(a, b) make_float4(a, b)
#define BT_GPU_make_float44(a, b, c, d) make_float4(a, b, c, d)
#define BT_GPU_PREF(func) btCuda_##func
#define BT_GPU_Memset cudaMemset
#define BT_GPU_MemcpyToSymbol(a, b, c) cudaMemcpyToSymbol(a, b, c)
#define BT_GPU_blockIdx blockIdx
#define BT_GPU_blockDim blockDim
#define BT_GPU_threadIdx threadIdx
#define BT_GPU_dot(a, b) dot(a, b)
#define BT_GPU_dot4(a, b) dot(a, b)
#define BT_GPU_cross(a, b) cross(a, b)
#define BT_GPU_BindTexture(a, b, c, d) cudaBindTexture(a, b, c, d)
#define BT_GPU_UnbindTexture(a) cudaUnbindTexture(a)
#define BT_GPU_EXECKERNEL(numb, numt, kfunc, args) kfunc<<<numb, numt>>>args
//! Check for CUDA error
#define BT_GPU_CHECK_ERROR(errorMessage) \
do \
{ \
cudaError_t err = cudaGetLastError(); \
if(err != cudaSuccess) \
{ \
fprintf(stderr,"Cuda error: %s in file '%s' in line %i : %s.\n",\
errorMessage, __FILE__, __LINE__, cudaGetErrorString( err));\
btCuda_exit(EXIT_FAILURE); \
} \
err = cudaThreadSynchronize(); \
if(err != cudaSuccess) \
{ \
fprintf(stderr,"Cuda error: %s in file '%s' in line %i : %s.\n",\
errorMessage, __FILE__, __LINE__, cudaGetErrorString( err));\
btCuda_exit(EXIT_FAILURE); \
} \
} \
while(0)
#define BT_GPU_SAFE_CALL_NO_SYNC(call) \
do \
{ \
cudaError err = call; \
if(err != cudaSuccess) \
{ \
fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \
__FILE__, __LINE__, cudaGetErrorString( err) ); \
btCuda_exit(EXIT_FAILURE); \
} \
} \
while(0)
#define BT_GPU_SAFE_CALL(call) \
do \
{ \
BT_GPU_SAFE_CALL_NO_SYNC(call); \
cudaError err = cudaThreadSynchronize(); \
if(err != cudaSuccess) \
{ \
fprintf(stderr,"Cuda errorSync in file '%s' in line %i : %s.\n",\
__FILE__, __LINE__, cudaGetErrorString( err) ); \
btCuda_exit(EXIT_FAILURE); \
} \
} while (0)
extern "C" void btCuda_exit(int val);
#endif // BTCUDADEFINES_H
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
// Common preprocessor definitions for CUDA compiler
#ifndef BTCUDADEFINES_H
#define BTCUDADEFINES_H
#ifdef __DEVICE_EMULATION__
#define B_CUDA_USE_TEX 0
#else
#define B_CUDA_USE_TEX 1
#endif
#if B_CUDA_USE_TEX
#define BT_GPU_FETCH(t, i) tex_fetch3F1U(tex1Dfetch(t##Tex, i))
#define BT_GPU_FETCH4(t, i) tex1Dfetch(t##Tex, i)
#else
#define BT_GPU_FETCH(t, i) t[i]
#define BT_GPU_FETCH4(t, i) t[i]
#endif
#define BT_GPU___device__ __device__
#define BT_GPU___devdata__ __device__
#define BT_GPU___constant__ __constant__
#define BT_GPU_max(a, b) max(a, b)
#define BT_GPU_min(a, b) min(a, b)
#define BT_GPU_params params
#define BT_GPU___mul24(a, b) __mul24(a, b)
#define BT_GPU___global__ __global__
#define BT_GPU___shared__ __shared__
#define BT_GPU___syncthreads() __syncthreads()
#define BT_GPU_make_uint2(x, y) make_uint2(x, y)
#define BT_GPU_make_int3(x, y, z) make_int3(x, y, z)
#define BT_GPU_make_float3(x, y, z) make_float3(x, y, z)
#define BT_GPU_make_float34(x) make_float3(x)
#define BT_GPU_make_float31(x) make_float3(x)
#define BT_GPU_make_float42(a, b) make_float4(a, b)
#define BT_GPU_make_float44(a, b, c, d) make_float4(a, b, c, d)
#define BT_GPU_PREF(func) btCuda_##func
#define BT_GPU_Memset cudaMemset
#define BT_GPU_MemcpyToSymbol(a, b, c) cudaMemcpyToSymbol(a, b, c)
#define BT_GPU_blockIdx blockIdx
#define BT_GPU_blockDim blockDim
#define BT_GPU_threadIdx threadIdx
#define BT_GPU_dot(a, b) dot(a, b)
#define BT_GPU_dot4(a, b) dot(a, b)
#define BT_GPU_cross(a, b) cross(a, b)
#define BT_GPU_BindTexture(a, b, c, d) cudaBindTexture(a, b, c, d)
#define BT_GPU_UnbindTexture(a) cudaUnbindTexture(a)
#define BT_GPU_EXECKERNEL(numb, numt, kfunc, args) kfunc<<<numb, numt>>>args
//! Check for CUDA error
#define BT_GPU_CHECK_ERROR(errorMessage) \
do \
{ \
cudaError_t err = cudaGetLastError(); \
if(err != cudaSuccess) \
{ \
fprintf(stderr,"Cuda error: %s in file '%s' in line %i : %s.\n",\
errorMessage, __FILE__, __LINE__, cudaGetErrorString( err));\
btCuda_exit(EXIT_FAILURE); \
} \
err = cudaThreadSynchronize(); \
if(err != cudaSuccess) \
{ \
fprintf(stderr,"Cuda error: %s in file '%s' in line %i : %s.\n",\
errorMessage, __FILE__, __LINE__, cudaGetErrorString( err));\
btCuda_exit(EXIT_FAILURE); \
} \
} \
while(0)
#define BT_GPU_SAFE_CALL_NO_SYNC(call) \
do \
{ \
cudaError err = call; \
if(err != cudaSuccess) \
{ \
fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \
__FILE__, __LINE__, cudaGetErrorString( err) ); \
btCuda_exit(EXIT_FAILURE); \
} \
} \
while(0)
#define BT_GPU_SAFE_CALL(call) \
do \
{ \
BT_GPU_SAFE_CALL_NO_SYNC(call); \
cudaError err = cudaThreadSynchronize(); \
if(err != cudaSuccess) \
{ \
fprintf(stderr,"Cuda errorSync in file '%s' in line %i : %s.\n",\
__FILE__, __LINE__, cudaGetErrorString( err) ); \
btCuda_exit(EXIT_FAILURE); \
} \
} while (0)
extern "C" void btCuda_exit(int val);
#endif // BTCUDADEFINES_H

View File

@@ -1,84 +1,84 @@
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include <cstdlib>
#include <cstdio>
#include <string.h>
#include <GL/glut.h>
#include <cuda_gl_interop.h>
#include "cutil_math.h"
#include "math_constants.h"
#include <vector_types.h>
#include "btCudaDefines.h"
#include "../../src/BulletMultiThreaded/btGpuUtilsSharedDefs.h"
void btCuda_exit(int val)
{
fprintf(stderr, "Press ENTER key to terminate the program\n");
getchar();
exit(val);
}
void btCuda_allocateArray(void** devPtr, unsigned int size)
{
BT_GPU_SAFE_CALL(cudaMalloc(devPtr, size));
}
void btCuda_freeArray(void* devPtr)
{
BT_GPU_SAFE_CALL(cudaFree(devPtr));
}
void btCuda_copyArrayFromDevice(void* host, const void* device, unsigned int size)
{
BT_GPU_SAFE_CALL(cudaMemcpy(host, device, size, cudaMemcpyDeviceToHost));
}
void btCuda_copyArrayToDevice(void* device, const void* host, unsigned int size)
{
BT_GPU_SAFE_CALL(cudaMemcpy((char*)device, host, size, cudaMemcpyHostToDevice));
}
void btCuda_registerGLBufferObject(unsigned int vbo)
{
BT_GPU_SAFE_CALL(cudaGLRegisterBufferObject(vbo));
}
void* btCuda_mapGLBufferObject(unsigned int vbo)
{
void *ptr;
BT_GPU_SAFE_CALL(cudaGLMapBufferObject(&ptr, vbo));
return ptr;
}
void btCuda_unmapGLBufferObject(unsigned int vbo)
{
BT_GPU_SAFE_CALL(cudaGLUnmapBufferObject(vbo));
}
#include "../../src/BulletMultiThreaded/btGpuUtilsSharedCode.h"
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include <cstdlib>
#include <cstdio>
#include <string.h>
#include <GL/glut.h>
#include <cuda_gl_interop.h>
#include "cutil_math.h"
#include "math_constants.h"
#include <vector_types.h>
#include "btCudaDefines.h"
#include "../../src/BulletMultiThreaded/btGpuUtilsSharedDefs.h"
void btCuda_exit(int val)
{
fprintf(stderr, "Press ENTER key to terminate the program\n");
getchar();
exit(val);
}
void btCuda_allocateArray(void** devPtr, unsigned int size)
{
BT_GPU_SAFE_CALL(cudaMalloc(devPtr, size));
}
void btCuda_freeArray(void* devPtr)
{
BT_GPU_SAFE_CALL(cudaFree(devPtr));
}
void btCuda_copyArrayFromDevice(void* host, const void* device, unsigned int size)
{
BT_GPU_SAFE_CALL(cudaMemcpy(host, device, size, cudaMemcpyDeviceToHost));
}
void btCuda_copyArrayToDevice(void* device, const void* host, unsigned int size)
{
BT_GPU_SAFE_CALL(cudaMemcpy((char*)device, host, size, cudaMemcpyHostToDevice));
}
void btCuda_registerGLBufferObject(unsigned int vbo)
{
BT_GPU_SAFE_CALL(cudaGLRegisterBufferObject(vbo));
}
void* btCuda_mapGLBufferObject(unsigned int vbo)
{
void *ptr;
BT_GPU_SAFE_CALL(cudaGLMapBufferObject(&ptr, vbo));
return ptr;
}
void btCuda_unmapGLBufferObject(unsigned int vbo)
{
BT_GPU_SAFE_CALL(cudaGLUnmapBufferObject(vbo));
}
#include "../../src/BulletMultiThreaded/btGpuUtilsSharedCode.h"

View File

@@ -1,42 +1,42 @@
/*
Impulse based Rigid body simulation using CUDA
Copyright (c) 2007 Takahiro Harada http://www.iii.u-tokyo.ac.jp/~takahiroharada/projects/impulseCUDA.html
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include <cstdlib>
#include <cstdio>
#include <string.h>
#include "cutil_math.h"
#include "math_constants.h"
#include <vector_types.h>
#include "btCudaDefines.h"
#include "../../src/BulletMultiThreaded/btGpuUtilsSharedDefs.h"
#include "../../Demos/Gpu2dDemo/btGpuDemo2dSharedTypes.h"
#include "../../Demos/Gpu2dDemo/btGpuDemo2dSharedDefs.h"
texture<float4, 1, cudaReadModeElementType> posTex;
#include "../../Demos/Gpu2dDemo/btGpuDemo2dSharedCode.h"
/*
Impulse based Rigid body simulation using CUDA
Copyright (c) 2007 Takahiro Harada http://www.iii.u-tokyo.ac.jp/~takahiroharada/projects/impulseCUDA.html
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include <cstdlib>
#include <cstdio>
#include <string.h>
#include "cutil_math.h"
#include "math_constants.h"
#include <vector_types.h>
#include "btCudaDefines.h"
#include "../../src/BulletMultiThreaded/btGpuUtilsSharedDefs.h"
#include "../../Demos/Gpu2dDemo/btGpuDemo2dSharedTypes.h"
#include "../../Demos/Gpu2dDemo/btGpuDemo2dSharedDefs.h"
texture<float4, 1, cudaReadModeElementType> posTex;
#include "../../Demos/Gpu2dDemo/btGpuDemo2dSharedCode.h"

View File

@@ -1,46 +1,46 @@
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include <cstdlib>
#include <cstdio>
#include <string.h>
#include "../../Extras/CUDA/cutil_math.h"
#include "math_constants.h"
#include <vector_types.h>
//----------------------------------------------------------------------------------------
#include "../../Extras/CUDA/btCudaDefines.h"
//----------------------------------------------------------------------------------------
#include "../../src/BulletMultiThreaded/btGpuUtilsSharedDefs.h"
#include "../../Demos/Gpu3dDemo/btGpuDemo3dSharedTypes.h"
#include "../../Demos/Gpu3dDemo/btGpuDemo3dSharedDefs.h"
//----------------------------------------------------------------------------------------
texture<float4, 1, cudaReadModeElementType> posTex;
//----------------------------------------------------------------------------------------
#include "../../Demos/Gpu3dDemo/btGpuDemo3dSharedCode.h"
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include <cstdlib>
#include <cstdio>
#include <string.h>
#include "../../Extras/CUDA/cutil_math.h"
#include "math_constants.h"
#include <vector_types.h>
//----------------------------------------------------------------------------------------
#include "../../Extras/CUDA/btCudaDefines.h"
//----------------------------------------------------------------------------------------
#include "../../src/BulletMultiThreaded/btGpuUtilsSharedDefs.h"
#include "../../Demos/Gpu3dDemo/btGpuDemo3dSharedTypes.h"
#include "../../Demos/Gpu3dDemo/btGpuDemo3dSharedDefs.h"
//----------------------------------------------------------------------------------------
texture<float4, 1, cudaReadModeElementType> posTex;
//----------------------------------------------------------------------------------------
#include "../../Demos/Gpu3dDemo/btGpuDemo3dSharedCode.h"
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------

View File

@@ -1,86 +1,86 @@
/*
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO USER:
*
* This source code is subject to NVIDIA ownership rights under U.S. and
* international Copyright laws.
*
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
* OR PERFORMANCE OF THIS SOURCE CODE.
*
* U.S. Government End Users. This source code is a "commercial item" as
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
* "commercial computer software" and "commercial computer software
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
* and is provided to the U.S. Government only as a commercial end item.
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
* source code with only those rights set forth herein.
*/
#ifndef CUTIL_GL_ERROR
#define CUTIL_GL_ERROR
/* CUda UTility Library */
// includes, system
#ifdef _WIN32
# define WINDOWS_LEAN_AND_MEAN
# include <windows.h>
# include <stdlib.h>
# undef min
# undef max
#endif
// includes, graphics
#if defined (__APPLE__) || defined(MACOSX)
#include <OpenGL/gl.h>
#include <OpenGL/glu.h>
#else
#include <GL/gl.h>
#include <GL/glu.h>
#endif
////////////////////////////////////////////////////////////////////////////
//! Check for OpenGL error
//! @return CUTTrue if no GL error has been encountered, otherwise 0
//! @param file __FILE__ macro
//! @param line __LINE__ macro
//! @note The GL error is listed on stderr
//! @note This function should be used via the CHECK_ERROR_GL() macro
////////////////////////////////////////////////////////////////////////////
CUTBoolean CUTIL_API
cutCheckErrorGL( const char* file, const int line)
{
CUTBoolean ret_val = CUTTrue;
// check for error
GLenum gl_error = glGetError();
if (gl_error != GL_NO_ERROR)
{
fprintf(stderr, "GL Error in file '%s' in line %d :\n", file, line);
fprintf(stderr, "%s\n", gluErrorString(gl_error));
ret_val = CUTFalse;
}
return ret_val;
}
#ifdef _DEBUG
#define CUT_CHECK_ERROR_GL() \
if( CUTFalse == cutCheckErrorGL( __FILE__, __LINE__)) { \
exit(EXIT_FAILURE); \
}
#endif // _DEBUG
#endif // CUTIL_GL_ERROR
/*
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO USER:
*
* This source code is subject to NVIDIA ownership rights under U.S. and
* international Copyright laws.
*
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
* OR PERFORMANCE OF THIS SOURCE CODE.
*
* U.S. Government End Users. This source code is a "commercial item" as
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
* "commercial computer software" and "commercial computer software
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
* and is provided to the U.S. Government only as a commercial end item.
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
* source code with only those rights set forth herein.
*/
#ifndef CUTIL_GL_ERROR
#define CUTIL_GL_ERROR
/* CUda UTility Library */
// includes, system
#ifdef _WIN32
# define WINDOWS_LEAN_AND_MEAN
# include <windows.h>
# include <stdlib.h>
# undef min
# undef max
#endif
// includes, graphics
#if defined (__APPLE__) || defined(MACOSX)
#include <OpenGL/gl.h>
#include <OpenGL/glu.h>
#else
#include <GL/gl.h>
#include <GL/glu.h>
#endif
////////////////////////////////////////////////////////////////////////////
//! Check for OpenGL error
//! @return CUTTrue if no GL error has been encountered, otherwise 0
//! @param file __FILE__ macro
//! @param line __LINE__ macro
//! @note The GL error is listed on stderr
//! @note This function should be used via the CHECK_ERROR_GL() macro
////////////////////////////////////////////////////////////////////////////
CUTBoolean CUTIL_API
cutCheckErrorGL( const char* file, const int line)
{
CUTBoolean ret_val = CUTTrue;
// check for error
GLenum gl_error = glGetError();
if (gl_error != GL_NO_ERROR)
{
fprintf(stderr, "GL Error in file '%s' in line %d :\n", file, line);
fprintf(stderr, "%s\n", gluErrorString(gl_error));
ret_val = CUTFalse;
}
return ret_val;
}
#ifdef _DEBUG
#define CUT_CHECK_ERROR_GL() \
if( CUTFalse == cutCheckErrorGL( __FILE__, __LINE__)) { \
exit(EXIT_FAILURE); \
}
#endif // _DEBUG
#endif // CUTIL_GL_ERROR

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,79 +1,79 @@
/*
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO USER:
*
* This source code is subject to NVIDIA ownership rights under U.S. and
* international Copyright laws.
*
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
* OR PERFORMANCE OF THIS SOURCE CODE.
*
* U.S. Government End Users. This source code is a "commercial item" as
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
* "commercial computer software" and "commercial computer software
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
* and is provided to the U.S. Government only as a commercial end item.
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
* source code with only those rights set forth herein.
*/
/* Radixsort project with key/value and arbitrary datset size support
* which demonstrates the use of CUDA in a multi phase sorting
* computation.
* Host code.
*/
#include "radixsort.cuh"
#include "radixsort_kernel.cu"
extern "C"
{
////////////////////////////////////////////////////////////////////////////////
//! Perform a radix sort
//! Sorting performed in place on passed arrays.
//!
//! @param pData0 input and output array - data will be sorted
//! @param pData1 additional array to allow ping pong computation
//! @param elements number of elements to sort
////////////////////////////////////////////////////////////////////////////////
void RadixSort(KeyValuePair *pData0, KeyValuePair *pData1, uint elements, uint bits)
{
// Round element count to total number of threads for efficiency
uint elements_rounded_to_3072;
int modval = elements % 3072;
if( modval == 0 )
elements_rounded_to_3072 = elements;
else
elements_rounded_to_3072 = elements + (3072 - (modval));
// Iterate over n bytes of y bit word, using each byte to sort the list in turn
for (uint shift = 0; shift < bits; shift += RADIX)
{
// Perform one round of radix sorting
// Generate per radix group sums radix counts across a radix group
RadixSum<<<NUM_BLOCKS, NUM_THREADS_PER_BLOCK, GRFSIZE>>>(pData0, elements, elements_rounded_to_3072, shift);
// Prefix sum in radix groups, and then between groups throughout a block
RadixPrefixSum<<<PREFIX_NUM_BLOCKS, PREFIX_NUM_THREADS_PER_BLOCK, PREFIX_GRFSIZE>>>();
// Sum the block offsets and then shuffle data into bins
RadixAddOffsetsAndShuffle<<<NUM_BLOCKS, NUM_THREADS_PER_BLOCK, SHUFFLE_GRFSIZE>>>(pData0, pData1, elements, elements_rounded_to_3072, shift);
// Exchange data pointers
KeyValuePair* pTemp = pData0;
pData0 = pData1;
pData1 = pTemp;
}
}
}
/*
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO USER:
*
* This source code is subject to NVIDIA ownership rights under U.S. and
* international Copyright laws.
*
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
* OR PERFORMANCE OF THIS SOURCE CODE.
*
* U.S. Government End Users. This source code is a "commercial item" as
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
* "commercial computer software" and "commercial computer software
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
* and is provided to the U.S. Government only as a commercial end item.
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
* source code with only those rights set forth herein.
*/
/* Radixsort project with key/value and arbitrary datset size support
* which demonstrates the use of CUDA in a multi phase sorting
* computation.
* Host code.
*/
#include "radixsort.cuh"
#include "radixsort_kernel.cu"
extern "C"
{
////////////////////////////////////////////////////////////////////////////////
//! Perform a radix sort
//! Sorting performed in place on passed arrays.
//!
//! @param pData0 input and output array - data will be sorted
//! @param pData1 additional array to allow ping pong computation
//! @param elements number of elements to sort
////////////////////////////////////////////////////////////////////////////////
void RadixSort(KeyValuePair *pData0, KeyValuePair *pData1, uint elements, uint bits)
{
// Round element count to total number of threads for efficiency
uint elements_rounded_to_3072;
int modval = elements % 3072;
if( modval == 0 )
elements_rounded_to_3072 = elements;
else
elements_rounded_to_3072 = elements + (3072 - (modval));
// Iterate over n bytes of y bit word, using each byte to sort the list in turn
for (uint shift = 0; shift < bits; shift += RADIX)
{
// Perform one round of radix sorting
// Generate per radix group sums radix counts across a radix group
RadixSum<<<NUM_BLOCKS, NUM_THREADS_PER_BLOCK, GRFSIZE>>>(pData0, elements, elements_rounded_to_3072, shift);
// Prefix sum in radix groups, and then between groups throughout a block
RadixPrefixSum<<<PREFIX_NUM_BLOCKS, PREFIX_NUM_THREADS_PER_BLOCK, PREFIX_GRFSIZE>>>();
// Sum the block offsets and then shuffle data into bins
RadixAddOffsetsAndShuffle<<<NUM_BLOCKS, NUM_THREADS_PER_BLOCK, SHUFFLE_GRFSIZE>>>(pData0, pData1, elements, elements_rounded_to_3072, shift);
// Exchange data pointers
KeyValuePair* pTemp = pData0;
pData0 = pData1;
pData1 = pTemp;
}
}
}

View File

@@ -1,63 +1,63 @@
/*
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO USER:
*
* This source code is subject to NVIDIA ownership rights under U.S. and
* international Copyright laws.
*
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
* OR PERFORMANCE OF THIS SOURCE CODE.
*
* U.S. Government End Users. This source code is a "commercial item" as
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
* "commercial computer software" and "commercial computer software
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
* and is provided to the U.S. Government only as a commercial end item.
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
* source code with only those rights set forth herein.
*/
/* Radixsort project which demonstrates the use of CUDA in a multi phase
* sorting computation.
* Type definitions.
*/
#ifndef _RADIXSORT_H_
#define _RADIXSORT_H_
#include <host_defines.h>
#define SYNCIT __syncthreads()
// Use 16 bit keys/values
#define SIXTEEN 0
typedef unsigned int uint;
typedef unsigned short ushort;
#if SIXTEEN
typedef struct __align__(4) {
ushort key;
ushort value;
#else
typedef struct __align__(8) {
uint key;
uint value;
#endif
} KeyValuePair;
extern "C" {
void RadixSort(KeyValuePair *pData0, KeyValuePair *pData1, uint elements, uint bits);
}
#endif // #ifndef _RADIXSORT_H_
/*
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO USER:
*
* This source code is subject to NVIDIA ownership rights under U.S. and
* international Copyright laws.
*
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
* OR PERFORMANCE OF THIS SOURCE CODE.
*
* U.S. Government End Users. This source code is a "commercial item" as
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
* "commercial computer software" and "commercial computer software
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
* and is provided to the U.S. Government only as a commercial end item.
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
* source code with only those rights set forth herein.
*/
/* Radixsort project which demonstrates the use of CUDA in a multi phase
* sorting computation.
* Type definitions.
*/
#ifndef _RADIXSORT_H_
#define _RADIXSORT_H_
#include <host_defines.h>
#define SYNCIT __syncthreads()
// Use 16 bit keys/values
#define SIXTEEN 0
typedef unsigned int uint;
typedef unsigned short ushort;
#if SIXTEEN
typedef struct __align__(4) {
ushort key;
ushort value;
#else
typedef struct __align__(8) {
uint key;
uint value;
#endif
} KeyValuePair;
extern "C" {
void RadixSort(KeyValuePair *pData0, KeyValuePair *pData1, uint elements, uint bits);
}
#endif // #ifndef _RADIXSORT_H_

File diff suppressed because it is too large Load Diff