more CUDA work
This commit is contained in:
@@ -49,13 +49,13 @@
|
||||
if( cudaSuccess != err) { \
|
||||
fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \
|
||||
errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
|
||||
exit(EXIT_FAILURE); \
|
||||
mm_exit(EXIT_FAILURE); \
|
||||
} \
|
||||
err = cudaThreadSynchronize(); \
|
||||
if( cudaSuccess != err) { \
|
||||
fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \
|
||||
errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
|
||||
exit(EXIT_FAILURE); \
|
||||
mm_exit(EXIT_FAILURE); \
|
||||
} } while (0)
|
||||
|
||||
|
||||
@@ -64,22 +64,27 @@
|
||||
if( cudaSuccess != err) { \
|
||||
fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \
|
||||
__FILE__, __LINE__, cudaGetErrorString( err) ); \
|
||||
exit(EXIT_FAILURE); \
|
||||
mm_exit(EXIT_FAILURE); \
|
||||
} } while (0)
|
||||
|
||||
# define MY_CUDA_SAFE_CALL( call) do { \
|
||||
MY_CUDA_SAFE_CALL_NO_SYNC(call); \
|
||||
cudaError err = cudaThreadSynchronize(); \
|
||||
if( cudaSuccess != err) { \
|
||||
fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \
|
||||
fprintf(stderr, "Cuda errorSync in file '%s' in line %i : %s.\n", \
|
||||
__FILE__, __LINE__, cudaGetErrorString( err) ); \
|
||||
exit(EXIT_FAILURE); \
|
||||
mm_exit(EXIT_FAILURE); \
|
||||
} } while (0)
|
||||
|
||||
|
||||
extern "C"
|
||||
{
|
||||
|
||||
void mm_exit(int val)
|
||||
{
|
||||
exit(val);
|
||||
}
|
||||
|
||||
void cudaInit(int argc, char **argv)
|
||||
{
|
||||
//CUT_DEVICE_INIT(argc, argv);
|
||||
@@ -112,6 +117,26 @@ void copyArrayFromDevice(void* host, const void* device, unsigned int vbo, int s
|
||||
void copyArrayToDevice(void* device, const void* host, int offset, int size)
|
||||
{
|
||||
MY_CUDA_SAFE_CALL(cudaMemcpy((char *) device + offset, host, size, cudaMemcpyHostToDevice));
|
||||
/*
|
||||
cudaError_t err = cudaMemcpy((char *) device + offset, host, size, cudaMemcpyHostToDevice);
|
||||
switch(err)
|
||||
{
|
||||
case cudaSuccess :
|
||||
return;
|
||||
case cudaErrorInvalidValue :
|
||||
printf("\ncudaErrorInvalidValue : %d\n", err);
|
||||
return;
|
||||
case cudaErrorInvalidDevicePointer :
|
||||
printf("\ncudaErrorInvalidDevicePointer : %d\n", err);
|
||||
return;
|
||||
case cudaErrorInvalidMemcpyDirection :
|
||||
printf("\ncudaErrorInvalidMemcpyDirection : %d\n", err);
|
||||
return;
|
||||
default :
|
||||
printf("\nX3 : %d\n", err);
|
||||
return;
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
void registerGLBufferObject(uint vbo)
|
||||
@@ -255,6 +280,27 @@ reorderDataAndFindCellStart(uint* particleHash,
|
||||
MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboOldPos));
|
||||
}
|
||||
|
||||
#if 1
|
||||
void
|
||||
findCellStart( uint* particleHash,
|
||||
uint* cellStart,
|
||||
uint numBodies,
|
||||
uint numCells)
|
||||
{
|
||||
int numThreads, numBlocks;
|
||||
computeGridSize(numBodies, 256, numBlocks, numThreads);
|
||||
|
||||
MY_CUDA_SAFE_CALL(cudaMemset(cellStart, 0xffffffff, numCells*sizeof(uint)));
|
||||
|
||||
findCellStartD<<< numBlocks, numThreads >>>(
|
||||
(uint2 *) particleHash,
|
||||
(uint *) cellStart);
|
||||
CUT_CHECK_ERROR("Kernel execution failed: findCellStartD");
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void
|
||||
collide(uint vboOldPos, uint vboNewPos,
|
||||
float* sortedPos, float* sortedVel,
|
||||
@@ -328,4 +374,67 @@ collide(uint vboOldPos, uint vboNewPos,
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
btCudaFindOverlappingPairs( float* pAABB,
|
||||
uint* pParticleHash,
|
||||
uint* pCellStart,
|
||||
uint* pPairBuff,
|
||||
uint* pPairBuffStartCurr,
|
||||
uint numParticles)
|
||||
{
|
||||
|
||||
// cudaError err = cudaMemset(pPairBuff, 0x00, numParticles*32*4);
|
||||
// if(err != cudaSuccess)
|
||||
// {
|
||||
// printf("\nAAAAA\n");
|
||||
// }
|
||||
|
||||
int numThreads, numBlocks;
|
||||
// computeGridSize(numParticles, 256, numBlocks, numThreads);
|
||||
computeGridSize(numParticles, 64, numBlocks, numThreads);
|
||||
// numThreads = 1;
|
||||
// numBlocks = 1;
|
||||
btCudaFindOverlappingPairsD<<< numBlocks, numThreads >>>(
|
||||
(float4 *)pAABB,
|
||||
(uint2*)pParticleHash,
|
||||
(uint*)pCellStart,
|
||||
(uint*)pPairBuff,
|
||||
(uint2*)pPairBuffStartCurr,
|
||||
numParticles
|
||||
);
|
||||
CUT_CHECK_ERROR("Kernel execution failed: btCudaFindOverlappingPairsD");
|
||||
} // btCudaFindOverlappingPairs()
|
||||
|
||||
|
||||
void
|
||||
btCudaComputePairCacheChanges(uint* pPairBuff, uint* pPairBuffStartCurr, uint* pPairScan, uint numParticles)
|
||||
{
|
||||
int numThreads, numBlocks;
|
||||
computeGridSize(numParticles, 256, numBlocks, numThreads);
|
||||
|
||||
btCudaComputePairCacheChangesD<<< numBlocks, numThreads >>>(
|
||||
(uint*)pPairBuff,
|
||||
(uint2*)pPairBuffStartCurr,
|
||||
(uint*)pPairScan
|
||||
);
|
||||
CUT_CHECK_ERROR("Kernel execution failed: btCudaComputePairCacheChangesD");
|
||||
} // btCudaFindOverlappingPairs()
|
||||
|
||||
|
||||
void
|
||||
btCudaSqueezeOverlappingPairBuff(uint* pPairBuff, uint* pPairBuffStartCurr, uint* pPairScan, uint* pPairOut, uint numParticles)
|
||||
{
|
||||
int numThreads, numBlocks;
|
||||
computeGridSize(numParticles, 256, numBlocks, numThreads);
|
||||
|
||||
btCudaSqueezeOverlappingPairBuffD<<< numBlocks, numThreads >>>(
|
||||
(uint*)pPairBuff,
|
||||
(uint2*)pPairBuffStartCurr,
|
||||
(uint*)pPairScan,
|
||||
pPairOut
|
||||
);
|
||||
CUT_CHECK_ERROR("Kernel execution failed: btCudaSqueezeOverlappingPairBuffD");
|
||||
}
|
||||
|
||||
|
||||
} // extern "C"
|
||||
|
||||
Reference in New Issue
Block a user