Files
bullet3/Extras/CUDA/particleSystem.cu
2008-10-29 00:43:27 +00:00

445 lines
15 KiB
Plaintext

/*
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO USER:
*
* This source code is subject to NVIDIA ownership rights under U.S. and
* international Copyright laws.
*
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
* OR PERFORMANCE OF THIS SOURCE CODE.
*
* U.S. Government End Users. This source code is a "commercial item" as
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
* "commercial computer software" and "commercial computer software
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
* and is provided to the U.S. Government only as a commercial end item.
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
* source code with only those rights set forth herein.
*/
//#include <cutil.h>
#include <cstdlib>
#include <cstdio>
#include <string.h>
#if defined(__APPLE__) || defined(MACOSX)
#include <GLUT/glut.h>
#else
#include <GL/glut.h>
#endif
#include <cuda_gl_interop.h>
#include "particles_kernel.cu"
#include "radixsort.cu"
//! Check for CUDA error
# define CUT_CHECK_ERROR(errorMessage) do { \
cudaError_t err = cudaGetLastError(); \
if( cudaSuccess != err) { \
fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \
errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
mm_exit(EXIT_FAILURE); \
} \
err = cudaThreadSynchronize(); \
if( cudaSuccess != err) { \
fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \
errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
mm_exit(EXIT_FAILURE); \
} } while (0)
# define MY_CUDA_SAFE_CALL_NO_SYNC( call) do { \
cudaError err = call; \
if( cudaSuccess != err) { \
fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \
__FILE__, __LINE__, cudaGetErrorString( err) ); \
mm_exit(EXIT_FAILURE); \
} } while (0)
# define MY_CUDA_SAFE_CALL( call) do { \
MY_CUDA_SAFE_CALL_NO_SYNC(call); \
cudaError err = cudaThreadSynchronize(); \
if( cudaSuccess != err) { \
fprintf(stderr, "Cuda errorSync in file '%s' in line %i : %s.\n", \
__FILE__, __LINE__, cudaGetErrorString( err) ); \
mm_exit(EXIT_FAILURE); \
} } while (0)
extern "C"
{
void mm_exit(int val)
{
exit(val);
}
void cudaInit(int argc, char **argv)
{
//CUT_DEVICE_INIT(argc, argv);
}
void allocateArray(void **devPtr, size_t size)
{
MY_CUDA_SAFE_CALL(cudaMalloc(devPtr, size));
}
void freeArray(void *devPtr)
{
MY_CUDA_SAFE_CALL(cudaFree(devPtr));
}
void threadSync()
{
MY_CUDA_SAFE_CALL(cudaThreadSynchronize());
}
void copyArrayFromDevice(void* host, const void* device, unsigned int vbo, int size)
{
if (vbo)
MY_CUDA_SAFE_CALL(cudaGLMapBufferObject((void**)&device, vbo));
MY_CUDA_SAFE_CALL(cudaMemcpy(host, device, size, cudaMemcpyDeviceToHost));
if (vbo)
MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vbo));
}
void copyArrayToDevice(void* device, const void* host, int offset, int size)
{
MY_CUDA_SAFE_CALL(cudaMemcpy((char *) device + offset, host, size, cudaMemcpyHostToDevice));
/*
cudaError_t err = cudaMemcpy((char *) device + offset, host, size, cudaMemcpyHostToDevice);
switch(err)
{
case cudaSuccess :
return;
case cudaErrorInvalidValue :
printf("\ncudaErrorInvalidValue : %d\n", err);
return;
case cudaErrorInvalidDevicePointer :
printf("\ncudaErrorInvalidDevicePointer : %d\n", err);
return;
case cudaErrorInvalidMemcpyDirection :
printf("\ncudaErrorInvalidMemcpyDirection : %d\n", err);
return;
default :
printf("\nX3 : %d\n", err);
return;
}
*/
}
void registerGLBufferObject(uint vbo)
{
MY_CUDA_SAFE_CALL(cudaGLRegisterBufferObject(vbo));
}
void unregisterGLBufferObject(uint vbo)
{
MY_CUDA_SAFE_CALL(cudaGLUnregisterBufferObject(vbo));
}
void setParameters(SimParams *hostParams)
{
// copy parameters to constant memory
MY_CUDA_SAFE_CALL( cudaMemcpyToSymbol(params, hostParams, sizeof(SimParams)) );
}
//Round a / b to nearest higher integer value
int iDivUp(int a, int b){
return (a % b != 0) ? (a / b + 1) : (a / b);
}
// compute grid and thread block size for a given number of elements
void computeGridSize(int n, int blockSize, int &numBlocks, int &numThreads)
{
numThreads = min(blockSize, n);
numBlocks = iDivUp(n, numThreads);
}
void
integrateSystem(uint vboOldPos, uint vboNewPos,
float* oldVel, float* newVel,
float deltaTime,
int numBodies)
{
int numThreads, numBlocks;
computeGridSize(numBodies, 256, numBlocks, numThreads);
float *oldPos, *newPos;
MY_CUDA_SAFE_CALL(cudaGLMapBufferObject((void**)&oldPos, vboOldPos));
MY_CUDA_SAFE_CALL(cudaGLMapBufferObject((void**)&newPos, vboNewPos));
// execute the kernel
integrate<<< numBlocks, numThreads >>>((float4*)newPos, (float4*)newVel,
(float4*)oldPos, (float4*)oldVel,
deltaTime);
// check if kernel invocation generated an error
CUT_CHECK_ERROR("integrate kernel execution failed");
MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboOldPos));
MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboNewPos));
}
void
updateGrid(uint vboPos,
uint* gridCounters,
uint* gridCells,
uint numBodies,
uint numCells)
{
int numThreads, numBlocks;
computeGridSize(numBodies, 256, numBlocks, numThreads);
float *pos;
MY_CUDA_SAFE_CALL(cudaGLMapBufferObject((void**)&pos, vboPos));
MY_CUDA_SAFE_CALL(cudaMemset(gridCounters, 0, numCells*sizeof(uint)));
// execute the kernel
updateGridD<<< numBlocks, numThreads >>>((float4 *) pos,
gridCounters,
gridCells);
// check if kernel invocation generated an error
CUT_CHECK_ERROR("Kernel execution failed");
MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboPos));
}
void
calcHash(uint vboPos,
uint* particleHash,
int numBodies)
{
int numThreads, numBlocks;
computeGridSize(numBodies, 256, numBlocks, numThreads);
float *pos;
MY_CUDA_SAFE_CALL(cudaGLMapBufferObject((void**)&pos, vboPos));
// execute the kernel
calcHashD<<< numBlocks, numThreads >>>((float4 *) pos,
(uint2 *) particleHash);
// check if kernel invocation generated an error
CUT_CHECK_ERROR("Kernel execution failed");
MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboPos));
}
void
reorderDataAndFindCellStart(uint* particleHash,
uint vboOldPos,
float* oldVel,
float* sortedPos,
float* sortedVel,
uint* cellStart,
uint numBodies,
uint numCells)
{
int numThreads, numBlocks;
computeGridSize(numBodies, 256, numBlocks, numThreads);
MY_CUDA_SAFE_CALL(cudaMemset(cellStart, 0xffffffff, numCells*sizeof(uint)));
float *oldPos;
MY_CUDA_SAFE_CALL(cudaGLMapBufferObject((void**)&oldPos, vboOldPos));
#if USE_TEX
MY_CUDA_SAFE_CALL(cudaBindTexture(0, oldPosTex, oldPos, numBodies*sizeof(float4)));
MY_CUDA_SAFE_CALL(cudaBindTexture(0, oldVelTex, oldVel, numBodies*sizeof(float4)));
#endif
reorderDataAndFindCellStartD<<< numBlocks, numThreads >>>(
(uint2 *) particleHash,
(float4 *) oldPos,
(float4 *) oldVel,
(float4 *) sortedPos,
(float4 *) sortedVel,
(uint *) cellStart);
CUT_CHECK_ERROR("Kernel execution failed: reorderDataAndFindCellStartD");
#if USE_TEX
MY_CUDA_SAFE_CALL(cudaUnbindTexture(oldPosTex));
MY_CUDA_SAFE_CALL(cudaUnbindTexture(oldVelTex));
#endif
MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboOldPos));
}
#if 1
void
findCellStart( uint* particleHash,
uint* cellStart,
uint numBodies,
uint numCells)
{
int numThreads, numBlocks;
computeGridSize(numBodies, 256, numBlocks, numThreads);
MY_CUDA_SAFE_CALL(cudaMemset(cellStart, 0xffffffff, numCells*sizeof(uint)));
findCellStartD<<< numBlocks, numThreads >>>(
(uint2 *) particleHash,
(uint *) cellStart);
CUT_CHECK_ERROR("Kernel execution failed: findCellStartD");
}
#endif
void
collide(uint vboOldPos, uint vboNewPos,
float* sortedPos, float* sortedVel,
float* oldVel, float* newVel,
uint* gridCounters,
uint* gridCells,
uint* particleHash,
uint* cellStart,
uint numBodies,
uint numCells,
uint maxParticlesPerCell)
{
float4 *oldPos, *newPos;
MY_CUDA_SAFE_CALL(cudaGLMapBufferObject((void**)&oldPos, vboOldPos));
MY_CUDA_SAFE_CALL(cudaGLMapBufferObject((void**)&newPos, vboNewPos));
#if USE_TEX
#if USE_SORT
// use sorted arrays
MY_CUDA_SAFE_CALL(cudaBindTexture(0, oldPosTex, sortedPos, numBodies*sizeof(float4)));
MY_CUDA_SAFE_CALL(cudaBindTexture(0, oldVelTex, sortedVel, numBodies*sizeof(float4)));
MY_CUDA_SAFE_CALL(cudaBindTexture(0, particleHashTex, particleHash, numBodies*sizeof(uint2)));
MY_CUDA_SAFE_CALL(cudaBindTexture(0, cellStartTex, cellStart, numCells*sizeof(uint)));
#else
MY_CUDA_SAFE_CALL(cudaBindTexture(0, oldPosTex, oldPos, numBodies*sizeof(float4)));
MY_CUDA_SAFE_CALL(cudaBindTexture(0, oldVelTex, oldVel, numBodies*sizeof(float4)));
MY_CUDA_SAFE_CALL(cudaBindTexture(0, gridCountersTex, gridCounters,numCells*sizeof(uint)));
MY_CUDA_SAFE_CALL(cudaBindTexture(0, gridCellsTex, gridCells, numCells*maxParticlesPerCell*sizeof(uint)));
#endif
#endif
// thread per particle
int numThreads, numBlocks;
computeGridSize(numBodies, BLOCKDIM, numBlocks, numThreads);
// execute the kernel
collideD<<< numBlocks, numThreads >>>((float4*)newPos, (float4*)newVel,
#if USE_SORT
(float4*)sortedPos, (float4*)sortedVel,
(uint2 *) particleHash,
cellStart
#else
(float4*)oldPos, (float4*)oldVel,
gridCounters,
gridCells
#endif
);
// check if kernel invocation generated an error
CUT_CHECK_ERROR("Kernel execution failed");
MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboNewPos));
MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboOldPos));
#if USE_TEX
MY_CUDA_SAFE_CALL(cudaUnbindTexture(oldPosTex));
MY_CUDA_SAFE_CALL(cudaUnbindTexture(oldVelTex));
#if USE_SORT
MY_CUDA_SAFE_CALL(cudaUnbindTexture(particleHashTex));
MY_CUDA_SAFE_CALL(cudaUnbindTexture(cellStartTex));
#else
MY_CUDA_SAFE_CALL(cudaUnbindTexture(gridCountersTex));
MY_CUDA_SAFE_CALL(cudaUnbindTexture(gridCellsTex));
#endif
#endif
}
void
btCudaFindOverlappingPairs( float* pAABB,
uint* pParticleHash,
uint* pCellStart,
uint* pPairBuff,
uint* pPairBuffStartCurr,
uint numParticles)
{
// cudaError err = cudaMemset(pPairBuff, 0x00, numParticles*32*4);
// if(err != cudaSuccess)
// {
// printf("\nAAAAA\n");
// }
MY_CUDA_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, numParticles*2*sizeof(float4)));
int numThreads, numBlocks;
// computeGridSize(numParticles, 256, numBlocks, numThreads);
computeGridSize(numParticles, 64, numBlocks, numThreads);
// numThreads = 1;
// numBlocks = 1;
btCudaFindOverlappingPairsD<<< numBlocks, numThreads >>>(
(float4 *)pAABB,
(uint2*)pParticleHash,
(uint*)pCellStart,
(uint*)pPairBuff,
(uint2*)pPairBuffStartCurr,
numParticles
);
CUT_CHECK_ERROR("Kernel execution failed: btCudaFindOverlappingPairsD");
MY_CUDA_SAFE_CALL(cudaUnbindTexture(pAABBTex));
} // btCudaFindOverlappingPairs()
void
btCudaComputePairCacheChanges(uint* pPairBuff, uint* pPairBuffStartCurr, uint* pPairScan, uint numParticles)
{
int numThreads, numBlocks;
computeGridSize(numParticles, 256, numBlocks, numThreads);
btCudaComputePairCacheChangesD<<< numBlocks, numThreads >>>(
(uint*)pPairBuff,
(uint2*)pPairBuffStartCurr,
(uint*)pPairScan
);
CUT_CHECK_ERROR("Kernel execution failed: btCudaComputePairCacheChangesD");
} // btCudaFindOverlappingPairs()
void
btCudaSqueezeOverlappingPairBuff(uint* pPairBuff, uint* pPairBuffStartCurr, uint* pPairScan, uint* pPairOut, uint numParticles)
{
int numThreads, numBlocks;
computeGridSize(numParticles, 256, numBlocks, numThreads);
btCudaSqueezeOverlappingPairBuffD<<< numBlocks, numThreads >>>(
(uint*)pPairBuff,
(uint2*)pPairBuffStartCurr,
(uint*)pPairScan,
pPairOut
);
CUT_CHECK_ERROR("Kernel execution failed: btCudaSqueezeOverlappingPairBuffD");
}
} // extern "C"