382 lines
13 KiB
Plaintext
382 lines
13 KiB
Plaintext
/*
|
|
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
|
|
*
|
|
* NOTICE TO USER:
|
|
*
|
|
* This source code is subject to NVIDIA ownership rights under U.S. and
|
|
* international Copyright laws.
|
|
*
|
|
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
|
|
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
|
|
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
|
|
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
|
|
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
|
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
|
|
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
|
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
|
|
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
|
|
* OR PERFORMANCE OF THIS SOURCE CODE.
|
|
*
|
|
* U.S. Government End Users. This source code is a "commercial item" as
|
|
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
|
|
* "commercial computer software" and "commercial computer software
|
|
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
|
|
* and is provided to the U.S. Government only as a commercial end item.
|
|
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
|
|
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
|
|
* source code with only those rights set forth herein.
|
|
*/
|
|
|
|
/*
|
|
* Device code.
|
|
*/
|
|
|
|
#ifndef _PARTICLES_KERNEL_H_
|
|
#define _PARTICLES_KERNEL_H_
|
|
|
|
#include <stdio.h>
|
|
#include <math.h>
|
|
#include "cutil_math.h"
|
|
#include "math_constants.h"
|
|
#include "particles_kernel.cuh"
|
|
|
|
#if USE_TEX
|
|
// textures for particle position and velocity
|
|
texture<float4, 1, cudaReadModeElementType> oldPosTex;
|
|
texture<float4, 1, cudaReadModeElementType> oldVelTex;
|
|
|
|
texture<uint2, 1, cudaReadModeElementType> particleHashTex;
|
|
texture<uint, 1, cudaReadModeElementType> cellStartTex;
|
|
|
|
texture<uint, 1, cudaReadModeElementType> gridCountersTex;
|
|
texture<uint, 1, cudaReadModeElementType> gridCellsTex;
|
|
#endif
|
|
|
|
__constant__ SimParams params;
|
|
|
|
// integrate particle attributes
|
|
__global__ void
|
|
integrate(float4* newPos, float4* newVel,
|
|
float4* oldPos, float4* oldVel,
|
|
float deltaTime)
|
|
{
|
|
int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
|
|
|
|
float4 pos4 = oldPos[index];
|
|
float4 vel4 = oldVel[index];
|
|
float3 pos = make_float3(pos4);
|
|
float3 vel = make_float3(vel4);
|
|
|
|
vel += params.gravity * deltaTime;
|
|
vel *= params.globalDamping;
|
|
|
|
// new position = old position + velocity * deltaTime
|
|
pos += vel * deltaTime;
|
|
|
|
// bounce off cube sides
|
|
if (pos.x > 1.0f - params.particleRadius) { pos.x = 1.0f - params.particleRadius; vel.x *= params.boundaryDamping; }
|
|
if (pos.x < -1.0f + params.particleRadius) { pos.x = -1.0f + params.particleRadius; vel.x *= params.boundaryDamping;}
|
|
if (pos.y > 1.0f - params.particleRadius) { pos.y = 1.0f - params.particleRadius; vel.y *= params.boundaryDamping; }
|
|
if (pos.y < -1.0f + params.particleRadius) { pos.y = -1.0f + params.particleRadius; vel.y *= params.boundaryDamping;}
|
|
if (pos.z > 1.0f - params.particleRadius) { pos.z = 1.0f - params.particleRadius; vel.z *= params.boundaryDamping; }
|
|
if (pos.z < -1.0f + params.particleRadius) { pos.z = -1.0f + params.particleRadius; vel.z *= params.boundaryDamping;}
|
|
|
|
// store new position and velocity
|
|
newPos[index] = make_float4(pos, pos4.w);
|
|
newVel[index] = make_float4(vel, vel4.w);
|
|
}
|
|
|
|
// calculate position in uniform grid
|
|
__device__ int3 calcGridPos(float4 p)
|
|
{
|
|
int3 gridPos;
|
|
gridPos.x = floor((p.x - params.worldOrigin.x) / params.cellSize.x);
|
|
gridPos.y = floor((p.y - params.worldOrigin.y) / params.cellSize.y);
|
|
gridPos.z = floor((p.z - params.worldOrigin.z) / params.cellSize.z);
|
|
return gridPos;
|
|
}
|
|
|
|
// calculate address in grid from position (clamping to edges)
|
|
__device__ uint calcGridHash(int3 gridPos)
|
|
{
|
|
gridPos.x = max(0, min(gridPos.x, params.gridSize.x-1));
|
|
gridPos.y = max(0, min(gridPos.y, params.gridSize.y-1));
|
|
gridPos.z = max(0, min(gridPos.z, params.gridSize.z-1));
|
|
return __mul24(__mul24(gridPos.z, params.gridSize.y), params.gridSize.x) + __mul24(gridPos.y, params.gridSize.x) + gridPos.x;
|
|
}
|
|
|
|
// add particle to cell using atomics
|
|
__device__ void addParticleToCell(int3 gridPos,
|
|
uint index,
|
|
uint* gridCounters,
|
|
uint* gridCells)
|
|
{
|
|
// calculate grid hash
|
|
uint gridHash = calcGridHash(gridPos);
|
|
|
|
// increment cell counter using atomics
|
|
#if defined CUDA_NO_SM_11_ATOMIC_INTRINSICS
|
|
int counter = 0;
|
|
#else
|
|
int counter = atomicAdd(&gridCounters[gridHash], 1); // returns previous value
|
|
counter = min(counter, params.maxParticlesPerCell-1);
|
|
#endif
|
|
|
|
// write particle index into this cell (very uncoalesced!)
|
|
gridCells[gridHash*params.maxParticlesPerCell + counter] = index;
|
|
}
|
|
|
|
|
|
// update uniform grid
|
|
__global__ void
|
|
updateGridD(float4* pos,
|
|
uint* gridCounters,
|
|
uint* gridCells)
|
|
{
|
|
int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
|
|
float4 p = pos[index];
|
|
|
|
// get address in grid
|
|
int3 gridPos = calcGridPos(p);
|
|
|
|
addParticleToCell(gridPos, index, gridCounters, gridCells);
|
|
}
|
|
|
|
// calculate grid hash value for each particle
|
|
__global__ void
|
|
calcHashD(float4* pos,
|
|
uint2* particleHash)
|
|
{
|
|
int index = __mul24(blockIdx.x, blockDim.x) + threadIdx.x;
|
|
float4 p = pos[index];
|
|
|
|
// get address in grid
|
|
int3 gridPos = calcGridPos(p);
|
|
uint gridHash = calcGridHash(gridPos);
|
|
|
|
// store grid hash and particle index
|
|
particleHash[index] = make_uint2(gridHash, index);
|
|
}
|
|
|
|
// rearrange particle data into sorted order, and find the start of each cell in the
|
|
// sorted hash array
|
|
__global__ void
|
|
reorderDataAndFindCellStartD(uint2* particleHash, // particle id sorted by hash
|
|
float4* oldPos,
|
|
float4* oldVel,
|
|
float4* sortedPos,
|
|
float4* sortedVel,
|
|
uint* cellStart)
|
|
{
|
|
int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
|
|
|
|
uint2 sortedData = particleHash[index];
|
|
|
|
// Load hash data into shared memory so that we can look
|
|
// at neighboring particle's hash value without loading
|
|
// two hash values per thread
|
|
__shared__ uint sharedHash[257];
|
|
sharedHash[threadIdx.x+1] = sortedData.x;
|
|
if (index > 0 && threadIdx.x == 0)
|
|
{
|
|
// first thread in block must load neighbor particle hash
|
|
volatile uint2 prevData = particleHash[index-1];
|
|
sharedHash[0] = prevData.x;
|
|
}
|
|
|
|
__syncthreads();
|
|
if (index == 0 || sortedData.x != sharedHash[threadIdx.x])
|
|
{
|
|
cellStart[sortedData.x] = index;
|
|
}
|
|
|
|
// Now use the sorted index to reorder the pos and vel data
|
|
float4 pos = FETCH(oldPos, sortedData.y); // macro does either global read or texture fetch
|
|
float4 vel = FETCH(oldVel, sortedData.y); // see particles_kernel.cuh
|
|
|
|
sortedPos[index] = pos;
|
|
sortedVel[index] = vel;
|
|
|
|
}
|
|
|
|
// collide two spheres using DEM method
|
|
__device__ float3 collideSpheres(float4 posA, float4 posB,
|
|
float4 velA, float4 velB,
|
|
float radiusA, float radiusB,
|
|
float attraction)
|
|
{
|
|
// calculate relative position
|
|
float3 relPos;
|
|
relPos.x = posB.x - posA.x;
|
|
relPos.y = posB.y - posA.y;
|
|
relPos.z = posB.z - posA.z;
|
|
|
|
float dist = length(relPos);
|
|
float collideDist = radiusA + radiusB;
|
|
|
|
float3 force = make_float3(0.0f);
|
|
if (dist < collideDist) {
|
|
float3 norm = relPos / dist;
|
|
|
|
// relative velocity
|
|
float3 relVel;
|
|
relVel.x = velB.x - velA.x;
|
|
relVel.y = velB.y - velA.y;
|
|
relVel.z = velB.z - velA.z;
|
|
|
|
// relative tangential velocity
|
|
float3 tanVel = relVel - (dot(relVel, norm) * norm);
|
|
|
|
// spring force
|
|
force = -params.spring*(collideDist - dist) * norm;
|
|
// dashpot (damping) force
|
|
force += params.damping*relVel;
|
|
// tangential shear force
|
|
force += params.shear*tanVel;
|
|
// attraction
|
|
force += attraction*relPos;
|
|
}
|
|
|
|
return force;
|
|
}
|
|
|
|
|
|
// collide particle with all particles in a given cell
|
|
// version using grid built with atomics
|
|
__device__
|
|
float3 collideCell(int3 gridPos,
|
|
uint index,
|
|
float4 pos,
|
|
float4 vel,
|
|
float4* oldPos,
|
|
float4* oldVel,
|
|
uint* gridCounters,
|
|
uint* gridCells)
|
|
{
|
|
float3 force = make_float3(0.0f);
|
|
|
|
if ((gridPos.x < 0) || (gridPos.x > params.gridSize.x-1) ||
|
|
(gridPos.y < 0) || (gridPos.y > params.gridSize.y-1) ||
|
|
(gridPos.z < 0) || (gridPos.z > params.gridSize.z-1)) {
|
|
return force;
|
|
}
|
|
|
|
uint gridHash = calcGridHash(gridPos);
|
|
|
|
// iterate over particles in this cell
|
|
uint particlesInCell = FETCH(gridCounters, gridHash);
|
|
particlesInCell = min(particlesInCell, params.maxParticlesPerCell-1);
|
|
|
|
for(uint i=0; i<particlesInCell; i++) {
|
|
uint index2 = FETCH(gridCells, gridHash*params.maxParticlesPerCell + i);
|
|
|
|
if (index2 != index) { // check not colliding with self
|
|
float4 pos2 = FETCH(oldPos, index2);
|
|
float4 vel2 = FETCH(oldVel, index2);
|
|
|
|
// collide two spheres
|
|
float3 projVec = collideSpheres(pos, pos2, vel, vel2, params.particleRadius, params.particleRadius, params.attraction);
|
|
force += projVec;
|
|
}
|
|
}
|
|
|
|
return force;
|
|
}
|
|
|
|
|
|
// version using sorted grid
|
|
__device__
|
|
float3 collideCell2(int3 gridPos,
|
|
uint index,
|
|
float4 pos,
|
|
float4 vel,
|
|
float4* oldPos,
|
|
float4* oldVel,
|
|
uint2* particleHash,
|
|
uint* cellStart)
|
|
{
|
|
float3 force = make_float3(0.0f);
|
|
|
|
if ((gridPos.x < 0) || (gridPos.x > params.gridSize.x-1) ||
|
|
(gridPos.y < 0) || (gridPos.y > params.gridSize.y-1) ||
|
|
(gridPos.z < 0) || (gridPos.z > params.gridSize.z-1)) {
|
|
return force;
|
|
}
|
|
|
|
uint gridHash = calcGridHash(gridPos);
|
|
|
|
// get start of bucket for this cell
|
|
uint bucketStart = FETCH(cellStart, gridHash);
|
|
if (bucketStart == 0xffffffff)
|
|
return force; // cell empty
|
|
|
|
// iterate over particles in this cell
|
|
for(uint i=0; i<params.maxParticlesPerCell; i++) {
|
|
uint index2 = bucketStart + i;
|
|
uint2 cellData = FETCH(particleHash, index2);
|
|
if (cellData.x != gridHash) break; // no longer in same bucket
|
|
|
|
if (index2 != index) { // check not colliding with self
|
|
float4 pos2 = FETCH(oldPos, index2);
|
|
float4 vel2 = FETCH(oldVel, index2);
|
|
|
|
// collide two spheres
|
|
float3 projVec = collideSpheres(pos, pos2, vel, vel2, params.particleRadius, params.particleRadius, params.attraction);
|
|
force += projVec;
|
|
}
|
|
}
|
|
|
|
return force;
|
|
}
|
|
|
|
|
|
__global__ void
|
|
collideD(float4* newPos, float4* newVel,
|
|
float4* oldPos, float4* oldVel,
|
|
#if USE_SORT
|
|
uint2* particleHash,
|
|
uint* cellStart
|
|
#else
|
|
uint* gridCounters,
|
|
uint* gridCells
|
|
#endif
|
|
)
|
|
{
|
|
int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
|
|
|
|
// read particle data from sorted arrays
|
|
float4 pos = FETCH(oldPos, index);
|
|
float4 vel = FETCH(oldVel, index);
|
|
|
|
// get address in grid
|
|
int3 gridPos = calcGridPos(pos);
|
|
|
|
float3 force = make_float3(0.0f);
|
|
|
|
// examine only neighbouring cells
|
|
for(int z=-1; z<=1; z++) {
|
|
for(int y=-1; y<=1; y++) {
|
|
for(int x=-1; x<=1; x++) {
|
|
#if USE_SORT
|
|
force += collideCell2(gridPos + make_int3(x, y, z), index, pos, vel, oldPos, oldVel, particleHash, cellStart);
|
|
#else
|
|
force += collideCell(gridPos + make_int3(x, y, z), index, pos, vel, oldPos, oldVel, gridCounters, gridCells);
|
|
#endif
|
|
}
|
|
}
|
|
}
|
|
|
|
float3 projVec = collideSpheres(pos, params.colliderPos, vel, make_float4(0.0f, 0.0f, 0.0f, 0.0f), params.particleRadius, params.colliderRadius, 0.0f);
|
|
force += projVec;
|
|
|
|
#if USE_SORT
|
|
// write new velocity back to original unsorted location
|
|
volatile uint2 sortedData = particleHash[index];
|
|
newVel[sortedData.y] = vel + make_float4(force, 0.0f);
|
|
#else
|
|
newVel[index] = vel + make_float4(force, 0.0f);
|
|
#endif
|
|
}
|
|
|
|
#endif
|