more CUDA work
This commit is contained in:
@@ -199,6 +199,35 @@ reorderDataAndFindCellStartD(uint2* particleHash, // particle id sorted by has
|
||||
|
||||
}
|
||||
|
||||
|
||||
__global__ void
|
||||
findCellStartD(uint2* particleHash, // particle id sorted by hash
|
||||
uint* cellStart)
|
||||
{
|
||||
int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
|
||||
|
||||
uint2 sortedData = particleHash[index];
|
||||
|
||||
// Load hash data into shared memory so that we can look
|
||||
// at neighboring particle's hash value without loading
|
||||
// two hash values per thread
|
||||
__shared__ uint sharedHash[257];
|
||||
sharedHash[threadIdx.x+1] = sortedData.x;
|
||||
if (index > 0 && threadIdx.x == 0)
|
||||
{
|
||||
// first thread in block must load neighbor particle hash
|
||||
volatile uint2 prevData = particleHash[index-1];
|
||||
sharedHash[0] = prevData.x;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
if (index == 0 || sortedData.x != sharedHash[threadIdx.x])
|
||||
{
|
||||
cellStart[sortedData.x] = index;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// collide two spheres using DEM method
|
||||
__device__ float3 collideSpheres(float4 posA, float4 posB,
|
||||
float4 velA, float4 velB,
|
||||
@@ -378,4 +407,217 @@ collideD(float4* newPos, float4* newVel,
|
||||
#endif
|
||||
}
|
||||
|
||||
__device__
|
||||
uint cudaTestAABBOverlap(float4 min0, float4 max0, float4 min1, float4 max1)
|
||||
{
|
||||
return (min0.x <= max1.x)&& (min1.x <= max0.x) &&
|
||||
(min0.y <= max1.y)&& (min1.y <= max0.y) &&
|
||||
(min0.z <= max1.z)&& (min1.z <= max0.z);
|
||||
}
|
||||
|
||||
#if 0
|
||||
__device__
|
||||
void findPairsInCell( int3 gridPos,
|
||||
uint index,
|
||||
uint2* pParticleHash,
|
||||
uint* pCellStart,
|
||||
float4* pAABB,
|
||||
uint* pPairBuff,
|
||||
uint2* pPairBuffStartCurr)
|
||||
{
|
||||
if ((gridPos.x < 0) || (gridPos.x > params.gridSize.x-1) ||
|
||||
(gridPos.y < 0) || (gridPos.y > params.gridSize.y-1) ||
|
||||
(gridPos.z < 0) || (gridPos.z > params.gridSize.z-1)) {
|
||||
return;
|
||||
}
|
||||
uint gridHash = calcGridHash(gridPos);
|
||||
// get start of bucket for this cell
|
||||
uint bucketStart = pCellStart[gridHash];
|
||||
if (bucketStart == 0xffffffff)
|
||||
return; // cell empty
|
||||
// iterate over particles in this cell
|
||||
float4 min0 = pAABB[index*2];
|
||||
float4 max0 = pAABB[index*2 + 1];
|
||||
uint2 start_curr = pPairBuffStartCurr[index];
|
||||
uint start = start_curr.x;
|
||||
uint curr = start_curr.y;
|
||||
// uint* pBuf = pPairBuff; // + curr;
|
||||
for(uint q=0; q < params.maxParticlesPerCell; q++)
|
||||
{
|
||||
uint index2 = bucketStart + q;
|
||||
uint2 cellData = pParticleHash[index2];
|
||||
if (cellData.x != gridHash) break; // no longer in same bucket
|
||||
if (index2 != index) // check not colliding with self
|
||||
{
|
||||
float4 min1 = pAABB[index2*2];
|
||||
float4 max1 = pAABB[index2*2 + 1];
|
||||
if(cudaTestAABBOverlap(min0, max0, min1, max1))
|
||||
{
|
||||
curr++;
|
||||
}
|
||||
//*pBuf = index2;
|
||||
//pPairBuff[curr] = index2;
|
||||
//pPairBuff[0] = index2;
|
||||
//pBuf++;
|
||||
}
|
||||
}
|
||||
// uint del = curr - start;
|
||||
if(curr != start)
|
||||
{
|
||||
curr = start;
|
||||
}
|
||||
pPairBuffStartCurr[index] = make_uint2(start, curr);
|
||||
|
||||
//
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
__device__
|
||||
void findPairsInCell( int3 gridPos,
|
||||
uint index,
|
||||
uint2* pParticleHash,
|
||||
uint* pCellStart,
|
||||
float4* pAABB,
|
||||
uint* pPairBuff,
|
||||
uint2* pPairBuffStartCurr,
|
||||
uint numParticles)
|
||||
{
|
||||
if ((gridPos.x < 0) || (gridPos.x > params.gridSize.x-1) ||
|
||||
(gridPos.y < 0) || (gridPos.y > params.gridSize.y-1) ||
|
||||
(gridPos.z < 0) || (gridPos.z > params.gridSize.z-1)) {
|
||||
return;
|
||||
}
|
||||
uint gridHash = calcGridHash(gridPos);
|
||||
// get start of bucket for this cell
|
||||
uint bucketStart = pCellStart[gridHash];
|
||||
if (bucketStart == 0xffffffff)
|
||||
return; // cell empty
|
||||
// iterate over particles in this cell
|
||||
float4 min0 = pAABB[index*2];
|
||||
float4 max0 = pAABB[index*2+1];
|
||||
|
||||
uint2 sortedData = pParticleHash[index];
|
||||
uint unsorted_indx = sortedData.y;
|
||||
uint2 start_curr = pPairBuffStartCurr[unsorted_indx];
|
||||
// uint2 start_curr = pPairBuffStartCurr[index];
|
||||
|
||||
uint start = start_curr.x;
|
||||
uint curr = start_curr.y;
|
||||
uint bucketEnd = bucketStart + params.maxParticlesPerCell;
|
||||
bucketEnd = (bucketEnd > numParticles) ? numParticles : bucketEnd;
|
||||
for(uint index2=bucketStart; index2 < bucketEnd; index2++)
|
||||
{
|
||||
uint2 cellData = pParticleHash[index2];
|
||||
if (cellData.x != gridHash) break; // no longer in same bucket
|
||||
if (index2 < index) // check not colliding with self
|
||||
{
|
||||
float4 min1 = pAABB[index2*2];
|
||||
float4 max1 = pAABB[index2*2 + 1];
|
||||
if(cudaTestAABBOverlap(min0, max0, min1, max1))
|
||||
{
|
||||
uint k;
|
||||
uint unsorted_indx2 = cellData.y;
|
||||
for(k = 0; k < curr; k++)
|
||||
{
|
||||
uint old_pair = pPairBuff[start+k] & (~BT_CUDA_PAIR_ANY_FLG);
|
||||
if(old_pair == unsorted_indx2)
|
||||
{
|
||||
pPairBuff[start+k] |= BT_CUDA_PAIR_FOUND_FLG;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(k == curr)
|
||||
{
|
||||
pPairBuff[start+curr] = unsorted_indx2 | BT_CUDA_PAIR_NEW_FLG;
|
||||
curr++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
pPairBuffStartCurr[unsorted_indx] = make_uint2(start, curr);
|
||||
// pPairBuffStartCurr[index] = make_uint2(start, curr);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
__global__ void
|
||||
btCudaFindOverlappingPairsD(
|
||||
float4* pAABB,
|
||||
uint2* pParticleHash,
|
||||
uint* pCellStart,
|
||||
uint* pPairBuff,
|
||||
uint2* pPairBuffStartCurr,
|
||||
uint numParticles
|
||||
)
|
||||
{
|
||||
int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
|
||||
float4 bbMin = pAABB[index*2];
|
||||
float4 bbMax = pAABB[index*2+1];
|
||||
float4 pos = (bbMin + bbMax) * 0.5f;
|
||||
// get address in grid
|
||||
int3 gridPos = calcGridPos(pos);
|
||||
// examine only neighbouring cells
|
||||
for(int z=-1; z<=1; z++) {
|
||||
for(int y=-1; y<=1; y++) {
|
||||
for(int x=-1; x<=1; x++) {
|
||||
findPairsInCell(gridPos + make_int3(x, y, z), index, pParticleHash, pCellStart, pAABB, pPairBuff, pPairBuffStartCurr, numParticles);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
__global__ void
|
||||
btCudaComputePairCacheChangesD(uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan)
|
||||
{
|
||||
int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
|
||||
uint2 start_curr = pPairBuffStartCurr[index];
|
||||
uint start = start_curr.x;
|
||||
uint curr = start_curr.y;
|
||||
uint *pInp = pPairBuff + start;
|
||||
uint num_changes = 0;
|
||||
for(uint k = 0; k < curr; k++, pInp++)
|
||||
{
|
||||
if(!((*pInp) & BT_CUDA_PAIR_FOUND_FLG))
|
||||
{
|
||||
num_changes++;
|
||||
}
|
||||
}
|
||||
pPairScan[index+1] = num_changes;
|
||||
}
|
||||
|
||||
|
||||
__global__ void
|
||||
btCudaSqueezeOverlappingPairBuffD(uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan, uint* pPairOut)
|
||||
{
|
||||
int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
|
||||
uint2 start_curr = pPairBuffStartCurr[index];
|
||||
uint start = start_curr.x;
|
||||
uint curr = start_curr.y;
|
||||
uint* pInp = pPairBuff + start;
|
||||
uint* pOut = pPairOut + pPairScan[index];
|
||||
uint* pOut2 = pInp;
|
||||
uint num = 0;
|
||||
for(uint k = 0; k < curr; k++, pInp++)
|
||||
{
|
||||
if(!((*pInp) & BT_CUDA_PAIR_FOUND_FLG))
|
||||
{
|
||||
*pOut = *pInp;
|
||||
pOut++;
|
||||
}
|
||||
if((*pInp) & BT_CUDA_PAIR_ANY_FLG)
|
||||
{
|
||||
*pOut2 = (*pInp) & (~BT_CUDA_PAIR_ANY_FLG);
|
||||
pOut2++;
|
||||
num++;
|
||||
}
|
||||
}
|
||||
pPairBuffStartCurr[index] = make_uint2(start, num);
|
||||
} // btCudaBroadphase::squeezeOverlappingPairBuffCPU_D()
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user