Files
bullet3/Extras/CUDA/particles_kernel.cu

382 lines
13 KiB
Plaintext

/*
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO USER:
*
* This source code is subject to NVIDIA ownership rights under U.S. and
* international Copyright laws.
*
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
* OR PERFORMANCE OF THIS SOURCE CODE.
*
* U.S. Government End Users. This source code is a "commercial item" as
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
* "commercial computer software" and "commercial computer software
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
* and is provided to the U.S. Government only as a commercial end item.
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
* source code with only those rights set forth herein.
*/
/*
* Device code.
*/
#ifndef _PARTICLES_KERNEL_H_
#define _PARTICLES_KERNEL_H_
#include <stdio.h>
#include <math.h>
#include "cutil_math.h"
#include "math_constants.h"
#include "particles_kernel.cuh"
#if USE_TEX
// textures for particle position and velocity
texture<float4, 1, cudaReadModeElementType> oldPosTex;
texture<float4, 1, cudaReadModeElementType> oldVelTex;
texture<uint2, 1, cudaReadModeElementType> particleHashTex;
texture<uint, 1, cudaReadModeElementType> cellStartTex;
texture<uint, 1, cudaReadModeElementType> gridCountersTex;
texture<uint, 1, cudaReadModeElementType> gridCellsTex;
#endif
__constant__ SimParams params;
// integrate particle attributes
__global__ void
integrate(float4* newPos, float4* newVel,
float4* oldPos, float4* oldVel,
float deltaTime)
{
int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
float4 pos4 = oldPos[index];
float4 vel4 = oldVel[index];
float3 pos = make_float3(pos4);
float3 vel = make_float3(vel4);
vel += params.gravity * deltaTime;
vel *= params.globalDamping;
// new position = old position + velocity * deltaTime
pos += vel * deltaTime;
// bounce off cube sides
if (pos.x > 1.0f - params.particleRadius) { pos.x = 1.0f - params.particleRadius; vel.x *= params.boundaryDamping; }
if (pos.x < -1.0f + params.particleRadius) { pos.x = -1.0f + params.particleRadius; vel.x *= params.boundaryDamping;}
if (pos.y > 1.0f - params.particleRadius) { pos.y = 1.0f - params.particleRadius; vel.y *= params.boundaryDamping; }
if (pos.y < -1.0f + params.particleRadius) { pos.y = -1.0f + params.particleRadius; vel.y *= params.boundaryDamping;}
if (pos.z > 1.0f - params.particleRadius) { pos.z = 1.0f - params.particleRadius; vel.z *= params.boundaryDamping; }
if (pos.z < -1.0f + params.particleRadius) { pos.z = -1.0f + params.particleRadius; vel.z *= params.boundaryDamping;}
// store new position and velocity
newPos[index] = make_float4(pos, pos4.w);
newVel[index] = make_float4(vel, vel4.w);
}
// calculate position in uniform grid
__device__ int3 calcGridPos(float4 p)
{
int3 gridPos;
gridPos.x = floor((p.x - params.worldOrigin.x) / params.cellSize.x);
gridPos.y = floor((p.y - params.worldOrigin.y) / params.cellSize.y);
gridPos.z = floor((p.z - params.worldOrigin.z) / params.cellSize.z);
return gridPos;
}
// calculate address in grid from position (clamping to edges)
__device__ uint calcGridHash(int3 gridPos)
{
gridPos.x = max(0, min(gridPos.x, params.gridSize.x-1));
gridPos.y = max(0, min(gridPos.y, params.gridSize.y-1));
gridPos.z = max(0, min(gridPos.z, params.gridSize.z-1));
return __mul24(__mul24(gridPos.z, params.gridSize.y), params.gridSize.x) + __mul24(gridPos.y, params.gridSize.x) + gridPos.x;
}
// add particle to cell using atomics
__device__ void addParticleToCell(int3 gridPos,
uint index,
uint* gridCounters,
uint* gridCells)
{
// calculate grid hash
uint gridHash = calcGridHash(gridPos);
// increment cell counter using atomics
#if defined CUDA_NO_SM_11_ATOMIC_INTRINSICS
int counter = 0;
#else
int counter = atomicAdd(&gridCounters[gridHash], 1); // returns previous value
counter = min(counter, params.maxParticlesPerCell-1);
#endif
// write particle index into this cell (very uncoalesced!)
gridCells[gridHash*params.maxParticlesPerCell + counter] = index;
}
// update uniform grid
__global__ void
updateGridD(float4* pos,
uint* gridCounters,
uint* gridCells)
{
int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
float4 p = pos[index];
// get address in grid
int3 gridPos = calcGridPos(p);
addParticleToCell(gridPos, index, gridCounters, gridCells);
}
// calculate grid hash value for each particle
__global__ void
calcHashD(float4* pos,
uint2* particleHash)
{
int index = __mul24(blockIdx.x, blockDim.x) + threadIdx.x;
float4 p = pos[index];
// get address in grid
int3 gridPos = calcGridPos(p);
uint gridHash = calcGridHash(gridPos);
// store grid hash and particle index
particleHash[index] = make_uint2(gridHash, index);
}
// rearrange particle data into sorted order, and find the start of each cell in the
// sorted hash array
__global__ void
reorderDataAndFindCellStartD(uint2* particleHash, // particle id sorted by hash
float4* oldPos,
float4* oldVel,
float4* sortedPos,
float4* sortedVel,
uint* cellStart)
{
int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
uint2 sortedData = particleHash[index];
// Load hash data into shared memory so that we can look
// at neighboring particle's hash value without loading
// two hash values per thread
__shared__ uint sharedHash[257];
sharedHash[threadIdx.x+1] = sortedData.x;
if (index > 0 && threadIdx.x == 0)
{
// first thread in block must load neighbor particle hash
volatile uint2 prevData = particleHash[index-1];
sharedHash[0] = prevData.x;
}
__syncthreads();
if (index == 0 || sortedData.x != sharedHash[threadIdx.x])
{
cellStart[sortedData.x] = index;
}
// Now use the sorted index to reorder the pos and vel data
float4 pos = FETCH(oldPos, sortedData.y); // macro does either global read or texture fetch
float4 vel = FETCH(oldVel, sortedData.y); // see particles_kernel.cuh
sortedPos[index] = pos;
sortedVel[index] = vel;
}
// collide two spheres using DEM method
__device__ float3 collideSpheres(float4 posA, float4 posB,
float4 velA, float4 velB,
float radiusA, float radiusB,
float attraction)
{
// calculate relative position
float3 relPos;
relPos.x = posB.x - posA.x;
relPos.y = posB.y - posA.y;
relPos.z = posB.z - posA.z;
float dist = length(relPos);
float collideDist = radiusA + radiusB;
float3 force = make_float3(0.0f);
if (dist < collideDist) {
float3 norm = relPos / dist;
// relative velocity
float3 relVel;
relVel.x = velB.x - velA.x;
relVel.y = velB.y - velA.y;
relVel.z = velB.z - velA.z;
// relative tangential velocity
float3 tanVel = relVel - (dot(relVel, norm) * norm);
// spring force
force = -params.spring*(collideDist - dist) * norm;
// dashpot (damping) force
force += params.damping*relVel;
// tangential shear force
force += params.shear*tanVel;
// attraction
force += attraction*relPos;
}
return force;
}
// collide particle with all particles in a given cell
// version using grid built with atomics
__device__
float3 collideCell(int3 gridPos,
uint index,
float4 pos,
float4 vel,
float4* oldPos,
float4* oldVel,
uint* gridCounters,
uint* gridCells)
{
float3 force = make_float3(0.0f);
if ((gridPos.x < 0) || (gridPos.x > params.gridSize.x-1) ||
(gridPos.y < 0) || (gridPos.y > params.gridSize.y-1) ||
(gridPos.z < 0) || (gridPos.z > params.gridSize.z-1)) {
return force;
}
uint gridHash = calcGridHash(gridPos);
// iterate over particles in this cell
uint particlesInCell = FETCH(gridCounters, gridHash);
particlesInCell = min(particlesInCell, params.maxParticlesPerCell-1);
for(uint i=0; i<particlesInCell; i++) {
uint index2 = FETCH(gridCells, gridHash*params.maxParticlesPerCell + i);
if (index2 != index) { // check not colliding with self
float4 pos2 = FETCH(oldPos, index2);
float4 vel2 = FETCH(oldVel, index2);
// collide two spheres
float3 projVec = collideSpheres(pos, pos2, vel, vel2, params.particleRadius, params.particleRadius, params.attraction);
force += projVec;
}
}
return force;
}
// version using sorted grid
__device__
float3 collideCell2(int3 gridPos,
uint index,
float4 pos,
float4 vel,
float4* oldPos,
float4* oldVel,
uint2* particleHash,
uint* cellStart)
{
float3 force = make_float3(0.0f);
if ((gridPos.x < 0) || (gridPos.x > params.gridSize.x-1) ||
(gridPos.y < 0) || (gridPos.y > params.gridSize.y-1) ||
(gridPos.z < 0) || (gridPos.z > params.gridSize.z-1)) {
return force;
}
uint gridHash = calcGridHash(gridPos);
// get start of bucket for this cell
uint bucketStart = FETCH(cellStart, gridHash);
if (bucketStart == 0xffffffff)
return force; // cell empty
// iterate over particles in this cell
for(uint i=0; i<params.maxParticlesPerCell; i++) {
uint index2 = bucketStart + i;
uint2 cellData = FETCH(particleHash, index2);
if (cellData.x != gridHash) break; // no longer in same bucket
if (index2 != index) { // check not colliding with self
float4 pos2 = FETCH(oldPos, index2);
float4 vel2 = FETCH(oldVel, index2);
// collide two spheres
float3 projVec = collideSpheres(pos, pos2, vel, vel2, params.particleRadius, params.particleRadius, params.attraction);
force += projVec;
}
}
return force;
}
__global__ void
collideD(float4* newPos, float4* newVel,
float4* oldPos, float4* oldVel,
#if USE_SORT
uint2* particleHash,
uint* cellStart
#else
uint* gridCounters,
uint* gridCells
#endif
)
{
int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
// read particle data from sorted arrays
float4 pos = FETCH(oldPos, index);
float4 vel = FETCH(oldVel, index);
// get address in grid
int3 gridPos = calcGridPos(pos);
float3 force = make_float3(0.0f);
// examine only neighbouring cells
for(int z=-1; z<=1; z++) {
for(int y=-1; y<=1; y++) {
for(int x=-1; x<=1; x++) {
#if USE_SORT
force += collideCell2(gridPos + make_int3(x, y, z), index, pos, vel, oldPos, oldVel, particleHash, cellStart);
#else
force += collideCell(gridPos + make_int3(x, y, z), index, pos, vel, oldPos, oldVel, gridCounters, gridCells);
#endif
}
}
}
float3 projVec = collideSpheres(pos, params.colliderPos, vel, make_float4(0.0f, 0.0f, 0.0f, 0.0f), params.particleRadius, params.colliderRadius, 0.0f);
force += projVec;
#if USE_SORT
// write new velocity back to original unsorted location
volatile uint2 sortedData = particleHash[index];
newVel[sortedData.y] = vel + make_float4(force, 0.0f);
#else
newVel[index] = vel + make_float4(force, 0.0f);
#endif
}
#endif