more CUDA work

This commit is contained in:
rponom
2008-10-28 23:25:59 +00:00
parent f5e16847df
commit 309a12836d
12 changed files with 1325 additions and 357 deletions

View File

@@ -49,13 +49,13 @@
if( cudaSuccess != err) { \
fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \
errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
exit(EXIT_FAILURE); \
mm_exit(EXIT_FAILURE); \
} \
err = cudaThreadSynchronize(); \
if( cudaSuccess != err) { \
fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \
errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
exit(EXIT_FAILURE); \
mm_exit(EXIT_FAILURE); \
} } while (0)
@@ -64,22 +64,27 @@
if( cudaSuccess != err) { \
fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \
__FILE__, __LINE__, cudaGetErrorString( err) ); \
exit(EXIT_FAILURE); \
mm_exit(EXIT_FAILURE); \
} } while (0)
# define MY_CUDA_SAFE_CALL( call) do { \
MY_CUDA_SAFE_CALL_NO_SYNC(call); \
cudaError err = cudaThreadSynchronize(); \
if( cudaSuccess != err) { \
fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \
fprintf(stderr, "Cuda errorSync in file '%s' in line %i : %s.\n", \
__FILE__, __LINE__, cudaGetErrorString( err) ); \
exit(EXIT_FAILURE); \
mm_exit(EXIT_FAILURE); \
} } while (0)
extern "C"
{
void mm_exit(int val)
{
exit(val);
}
void cudaInit(int argc, char **argv)
{
//CUT_DEVICE_INIT(argc, argv);
@@ -112,6 +117,26 @@ void copyArrayFromDevice(void* host, const void* device, unsigned int vbo, int s
void copyArrayToDevice(void* device, const void* host, int offset, int size)
{
MY_CUDA_SAFE_CALL(cudaMemcpy((char *) device + offset, host, size, cudaMemcpyHostToDevice));
/*
cudaError_t err = cudaMemcpy((char *) device + offset, host, size, cudaMemcpyHostToDevice);
switch(err)
{
case cudaSuccess :
return;
case cudaErrorInvalidValue :
printf("\ncudaErrorInvalidValue : %d\n", err);
return;
case cudaErrorInvalidDevicePointer :
printf("\ncudaErrorInvalidDevicePointer : %d\n", err);
return;
case cudaErrorInvalidMemcpyDirection :
printf("\ncudaErrorInvalidMemcpyDirection : %d\n", err);
return;
default :
printf("\nX3 : %d\n", err);
return;
}
*/
}
void registerGLBufferObject(uint vbo)
@@ -255,6 +280,27 @@ reorderDataAndFindCellStart(uint* particleHash,
MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboOldPos));
}
#if 1
void
findCellStart( uint* particleHash,
uint* cellStart,
uint numBodies,
uint numCells)
{
int numThreads, numBlocks;
computeGridSize(numBodies, 256, numBlocks, numThreads);
MY_CUDA_SAFE_CALL(cudaMemset(cellStart, 0xffffffff, numCells*sizeof(uint)));
findCellStartD<<< numBlocks, numThreads >>>(
(uint2 *) particleHash,
(uint *) cellStart);
CUT_CHECK_ERROR("Kernel execution failed: findCellStartD");
}
#endif
void
collide(uint vboOldPos, uint vboNewPos,
float* sortedPos, float* sortedVel,
@@ -328,4 +374,67 @@ collide(uint vboOldPos, uint vboNewPos,
#endif
}
void
btCudaFindOverlappingPairs( float* pAABB,
uint* pParticleHash,
uint* pCellStart,
uint* pPairBuff,
uint* pPairBuffStartCurr,
uint numParticles)
{
// cudaError err = cudaMemset(pPairBuff, 0x00, numParticles*32*4);
// if(err != cudaSuccess)
// {
// printf("\nAAAAA\n");
// }
int numThreads, numBlocks;
// computeGridSize(numParticles, 256, numBlocks, numThreads);
computeGridSize(numParticles, 64, numBlocks, numThreads);
// numThreads = 1;
// numBlocks = 1;
btCudaFindOverlappingPairsD<<< numBlocks, numThreads >>>(
(float4 *)pAABB,
(uint2*)pParticleHash,
(uint*)pCellStart,
(uint*)pPairBuff,
(uint2*)pPairBuffStartCurr,
numParticles
);
CUT_CHECK_ERROR("Kernel execution failed: btCudaFindOverlappingPairsD");
} // btCudaFindOverlappingPairs()
void
btCudaComputePairCacheChanges(uint* pPairBuff, uint* pPairBuffStartCurr, uint* pPairScan, uint numParticles)
{
int numThreads, numBlocks;
computeGridSize(numParticles, 256, numBlocks, numThreads);
btCudaComputePairCacheChangesD<<< numBlocks, numThreads >>>(
(uint*)pPairBuff,
(uint2*)pPairBuffStartCurr,
(uint*)pPairScan
);
CUT_CHECK_ERROR("Kernel execution failed: btCudaComputePairCacheChangesD");
} // btCudaFindOverlappingPairs()
void
btCudaSqueezeOverlappingPairBuff(uint* pPairBuff, uint* pPairBuffStartCurr, uint* pPairScan, uint* pPairOut, uint numParticles)
{
int numThreads, numBlocks;
computeGridSize(numParticles, 256, numBlocks, numThreads);
btCudaSqueezeOverlappingPairBuffD<<< numBlocks, numThreads >>>(
(uint*)pPairBuff,
(uint2*)pPairBuffStartCurr,
(uint*)pPairScan,
pPairOut
);
CUT_CHECK_ERROR("Kernel execution failed: btCudaSqueezeOverlappingPairBuffD");
}
} // extern "C"