fix: some file didn't have the svn:eol-style native yet
This commit is contained in:
@@ -1,402 +1,402 @@
|
||||
/*
|
||||
FLUIDS v.1 - SPH Fluid Simulator for CPU and GPU
|
||||
Copyright (C) 2008. Rama Hoetzlein, http://www.rchoetzlein.com
|
||||
|
||||
ZLib license
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#ifndef _PARTICLES_KERNEL_H_
|
||||
#define _PARTICLES_KERNEL_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "fluid_system_host.cuh"
|
||||
|
||||
#define TOTAL_THREADS 65536
|
||||
#define BLOCK_THREADS 256
|
||||
#define MAX_NBR 80
|
||||
|
||||
__constant__ FluidParams simData; // simulation data (on device)
|
||||
|
||||
__device__ int bufNeighbor[ TOTAL_THREADS*MAX_NBR ];
|
||||
__device__ float bufNdist[ TOTAL_THREADS*MAX_NBR ];
|
||||
|
||||
#define COLOR(r,g,b) ( (uint((r)*255.0f)<<24) | (uint((g)*255.0f)<<16) | (uint((b)*255.0f)<<8) )
|
||||
#define COLORA(r,g,b,a) ( (uint((r)*255.0f)<<24) | (uint((g)*255.0f)<<16) | (uint((b)*255.0f)<<8) | uint((a)*255.0f) )
|
||||
|
||||
#define NULL_HASH 333333
|
||||
|
||||
#define OFFSET_CLR 12
|
||||
#define OFFSET_NEXT 16
|
||||
#define OFFSET_VEL 20
|
||||
#define OFFSET_VEVAL 32
|
||||
#define OFFSET_PRESS 48
|
||||
#define OFFSET_DENS 52
|
||||
#define OFFSET_FORCE 56
|
||||
|
||||
|
||||
__global__ void hashParticles ( char* bufPnts, uint2* bufHash, int numPnt )
|
||||
{
|
||||
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
|
||||
float3* pos = (float3*) (bufPnts + __mul24(ndx, simData.stride) );
|
||||
int gz = (pos->z - simData.min.z) * simData.delta.z ;
|
||||
int gy = (pos->y - simData.min.y) * simData.delta.y ;
|
||||
int gx = (pos->x - simData.min.x) * simData.delta.x ;
|
||||
if ( ndx >= numPnt || gx < 0 || gz > simData.res.x-1 || gy < 0 || gy > simData.res.y-1 || gz < 0 || gz > simData.res.z-1 )
|
||||
bufHash[ndx] = make_uint2( NULL_HASH, ndx );
|
||||
else
|
||||
bufHash[ndx] = make_uint2( __mul24(__mul24(gz, (int) simData.res.y)+gy, (int) simData.res.x) + gx, ndx );
|
||||
|
||||
__syncthreads ();
|
||||
}
|
||||
|
||||
__global__ void insertParticles ( char* bufPnts, uint2* bufHash, int* bufGrid, int numPnt, int numGrid )
|
||||
{
|
||||
uint grid_ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // grid cell index
|
||||
|
||||
bufPnts += OFFSET_NEXT;
|
||||
bufGrid[grid_ndx] = -1;
|
||||
for (int n=0; n < numPnt; n++) {
|
||||
if ( bufHash[n].x == grid_ndx ) {
|
||||
*(int*) (bufPnts + __mul24(bufHash[n].y, simData.stride)) = bufGrid[grid_ndx];
|
||||
bufGrid[grid_ndx] = bufHash[n].y;
|
||||
}
|
||||
}
|
||||
__syncthreads ();
|
||||
}
|
||||
|
||||
__global__ void insertParticlesRadix ( char* bufPnts, uint2* bufHash, int* bufGrid, char* bufPntSort, int numPnt, int numGrid )
|
||||
{
|
||||
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
|
||||
|
||||
uint2 bufHashSort = bufHash[ndx];
|
||||
|
||||
__shared__ uint sharedHash[257];
|
||||
sharedHash[threadIdx.x+1] = bufHashSort.x;
|
||||
if ( ndx > 0 && threadIdx.x == 0 ) {
|
||||
volatile uint2 prevData = bufHash[ndx-1];
|
||||
sharedHash[0] = prevData.x;
|
||||
}
|
||||
__syncthreads ();
|
||||
|
||||
if ( (ndx == 0 || bufHashSort.x != sharedHash[threadIdx.x]) && bufHashSort.x != NULL_HASH ) {
|
||||
bufGrid [ bufHashSort.x ] = ndx;
|
||||
}
|
||||
if ( ndx < numPnt ) {
|
||||
char* src = bufPnts + __mul24( bufHashSort.y, simData.stride );
|
||||
char* dest = bufPntSort + __mul24( ndx, simData.stride );
|
||||
|
||||
*(float3*)(dest) = *(float3*)(src);
|
||||
*(uint*) (dest + OFFSET_CLR) = *(uint*) (src + OFFSET_CLR);
|
||||
*(float3*)(dest + OFFSET_VEL) = *(float3*)(src + OFFSET_VEL);
|
||||
*(float3*)(dest + OFFSET_VEVAL) = *(float3*)(src + OFFSET_VEVAL);
|
||||
|
||||
*(float*) (dest + OFFSET_DENS) = 0.0;
|
||||
*(float*) (dest + OFFSET_PRESS) = 0.0;
|
||||
*(float3*) (dest + OFFSET_FORCE)= make_float3(0,0,0);
|
||||
*(int*) (dest + OFFSET_NEXT) = bufHashSort.x;
|
||||
}
|
||||
|
||||
__syncthreads ();
|
||||
|
||||
}
|
||||
|
||||
//__shared__ int ncount [ BLOCK_THREADS ];
|
||||
|
||||
__device__ float contributePressure ( int pndx, float3* p, int qndx, int grid_ndx, char* bufPnts, uint2* bufHash )
|
||||
{
|
||||
float3* qpos;
|
||||
float3 dist;
|
||||
float dsq, c, sum;
|
||||
float d = simData.sim_scale;
|
||||
int nbr = __mul24(pndx, MAX_NBR);
|
||||
|
||||
sum = 0.0;
|
||||
for ( ; qndx < simData.pnts; qndx++ ) {
|
||||
|
||||
if ( bufHash[qndx].x != grid_ndx || qndx == NULL_HASH) break;
|
||||
|
||||
if ( qndx != pndx ) {
|
||||
qpos = (float3*) ( bufPnts + __mul24(qndx, simData.stride ));
|
||||
|
||||
dist.x = ( p->x - qpos->x )*d; // dist in cm
|
||||
dist.y = ( p->y - qpos->y )*d;
|
||||
dist.z = ( p->z - qpos->z )*d;
|
||||
dsq = (dist.x*dist.x + dist.y*dist.y + dist.z*dist.z);
|
||||
if ( dsq < simData.r2 ) {
|
||||
c = simData.r2 - dsq;
|
||||
sum += c * c * c;
|
||||
if ( bufNeighbor[nbr] < MAX_NBR ) {
|
||||
bufNeighbor[ nbr+bufNeighbor[nbr] ] = qndx;
|
||||
bufNdist[ nbr+bufNeighbor[nbr] ] = sqrt(dsq);
|
||||
bufNeighbor[nbr]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
//curr = *(int*) (bufPnts + __mul24(curr, simData.stride) + OFFSET_NEXT);
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
/*if ( ncount[threadIdx.x] < MAX_NBR ) {
|
||||
bufNeighbor [ nbr + ncount[threadIdx.x] ] = curr;
|
||||
bufNdist [ nbr + ncount[threadIdx.x] ] = sqrt(dsq);
|
||||
ncount[threadIdx.x]++;
|
||||
}*/
|
||||
|
||||
__global__ void computePressure ( char* bufPntSort, int* bufGrid, uint2* bufHash, int numPnt )
|
||||
{
|
||||
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
|
||||
|
||||
//if ( ndx < 1024 ) {
|
||||
|
||||
float3* pos = (float3*) (bufPntSort + __mul24(ndx, simData.stride));
|
||||
|
||||
// Find 2x2x2 grid cells
|
||||
// - Use registers only, no arrays (local-memory too slow)
|
||||
int3 cell;
|
||||
int gc0, gc1, gc2, gc3, gc4, gc5, gc6, gc7;
|
||||
float gs = simData.smooth_rad / simData.sim_scale;
|
||||
|
||||
cell.x = max(0, (int)((-gs + pos->x - simData.min.x) * simData.delta.x));
|
||||
cell.y = max(0, (int)((-gs + pos->y - simData.min.y) * simData.delta.y));
|
||||
cell.z = max(0, (int)((-gs + pos->z - simData.min.z) * simData.delta.z));
|
||||
gc0 = __mul24(__mul24(cell.z, simData.res.y) + cell.y, simData.res.x) + cell.x;
|
||||
gc1 = gc0 + 1;
|
||||
gc2 = gc0 + simData.res.x;
|
||||
gc3 = gc2 + 1;
|
||||
if ( cell.z+1 < simData.res.z ) {
|
||||
gc4 = gc0 + __mul24(simData.res.x, simData.res.y);
|
||||
gc5 = gc4 + 1;
|
||||
gc6 = gc4 + simData.res.x;
|
||||
gc7 = gc6 + 1;
|
||||
}
|
||||
if ( cell.x+1 >= simData.res.x ) {
|
||||
gc1 = -1; gc3 = -1;
|
||||
gc5 = -1; gc7 = -1;
|
||||
}
|
||||
if ( cell.y+1 >= simData.res.y ) {
|
||||
gc2 = -1; gc3 = -1;
|
||||
gc6 = -1; gc7 = -1;
|
||||
}
|
||||
// Sum Pressure
|
||||
float sum = 0.0;
|
||||
bufNeighbor[ __mul24(ndx, MAX_NBR) ] = 1;
|
||||
if (gc0 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc0], gc0, bufPntSort, bufHash );
|
||||
if (gc1 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc1], gc1, bufPntSort, bufHash );
|
||||
if (gc2 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc2], gc2, bufPntSort, bufHash );
|
||||
if (gc3 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc3], gc3, bufPntSort, bufHash );
|
||||
if (gc4 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc4], gc4, bufPntSort, bufHash );
|
||||
if (gc5 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc5], gc5, bufPntSort, bufHash );
|
||||
if (gc6 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc6], gc6, bufPntSort, bufHash );
|
||||
if (gc7 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc7], gc7, bufPntSort, bufHash );
|
||||
|
||||
// Compute Density & Pressure
|
||||
sum = sum * simData.pmass * simData.poly6kern;
|
||||
if ( sum == 0.0 ) sum = 1.0;
|
||||
*(float*) ((char*)pos + OFFSET_PRESS) = ( sum - simData.rest_dens ) * simData.stiffness;
|
||||
*(float*) ((char*)pos + OFFSET_DENS) = 1.0f / sum;
|
||||
|
||||
//}
|
||||
//__syncthreads ();
|
||||
}
|
||||
|
||||
__device__ void contributeForce ( float3& force, int pndx, float3* p, int qndx, int grid_ndx, char* bufPnts, uint2* bufHash )
|
||||
{
|
||||
float press = *(float*) ((char*)p + OFFSET_PRESS);
|
||||
float dens = *(float*) ((char*)p + OFFSET_DENS);
|
||||
float3 veval = *(float3*) ((char*)p + OFFSET_VEVAL );
|
||||
float3 qeval, dist;
|
||||
float c, ndistj, dsq;
|
||||
float pterm, dterm, vterm;
|
||||
float3* qpos;
|
||||
float d = simData.sim_scale;
|
||||
|
||||
vterm = simData.lapkern * simData.visc;
|
||||
|
||||
for ( ; qndx < simData.pnts; qndx++ ) {
|
||||
|
||||
if ( bufHash[qndx].x != grid_ndx || qndx == NULL_HASH) break;
|
||||
|
||||
if ( qndx != pndx ) {
|
||||
qpos = (float3*) ( bufPnts + __mul24(qndx, simData.stride ));
|
||||
|
||||
dist.x = ( p->x - qpos->x )*d; // dist in cm
|
||||
dist.y = ( p->y - qpos->y )*d;
|
||||
dist.z = ( p->z - qpos->z )*d;
|
||||
dsq = (dist.x*dist.x + dist.y*dist.y + dist.z*dist.z);
|
||||
if ( dsq < simData.r2 ) {
|
||||
ndistj = sqrt(dsq);
|
||||
c = ( simData.smooth_rad - ndistj );
|
||||
dist.x = ( p->x - qpos->x )*d; // dist in cm
|
||||
dist.y = ( p->y - qpos->y )*d;
|
||||
dist.z = ( p->z - qpos->z )*d;
|
||||
pterm = -0.5f * c * simData.spikykern * ( press + *(float*)((char*)qpos+OFFSET_PRESS) ) / ndistj;
|
||||
dterm = c * dens * *(float*)((char*)qpos+OFFSET_DENS);
|
||||
qeval = *(float3*)((char*)qpos+OFFSET_VEVAL);
|
||||
force.x += ( pterm * dist.x + vterm * ( qeval.x - veval.x )) * dterm;
|
||||
force.y += ( pterm * dist.y + vterm * ( qeval.y - veval.y )) * dterm;
|
||||
force.z += ( pterm * dist.z + vterm * ( qeval.z - veval.z )) * dterm;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
__global__ void computeForce ( char* bufPntSort, int* bufGrid, uint2* bufHash, int numPnt )
|
||||
{
|
||||
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
|
||||
|
||||
//if ( ndx < numPnt ) {
|
||||
|
||||
float3* pos = (float3*) (bufPntSort + __mul24(ndx, simData.stride));
|
||||
|
||||
// Find 2x2x2 grid cells
|
||||
// - Use registers only, no arrays (local-memory too slow)
|
||||
int3 cell;
|
||||
int gc0, gc1, gc2, gc3, gc4, gc5, gc6, gc7;
|
||||
float gs = simData.smooth_rad / simData.sim_scale;
|
||||
|
||||
cell.x = max(0, (int)((-gs + pos->x - simData.min.x) * simData.delta.x));
|
||||
cell.y = max(0, (int)((-gs + pos->y - simData.min.y) * simData.delta.y));
|
||||
cell.z = max(0, (int)((-gs + pos->z - simData.min.z) * simData.delta.z));
|
||||
gc0 = __mul24(__mul24(cell.z, simData.res.y) + cell.y, simData.res.x) + cell.x;
|
||||
gc1 = gc0 + 1;
|
||||
gc2 = gc0 + simData.res.x;
|
||||
gc3 = gc2 + 1;
|
||||
if ( cell.z+1 < simData.res.z ) {
|
||||
gc4 = gc0 + __mul24(simData.res.x, simData.res.y);
|
||||
gc5 = gc4 + 1;
|
||||
gc6 = gc4 + simData.res.x;
|
||||
gc7 = gc6 + 1;
|
||||
}
|
||||
if ( cell.x+1 >= simData.res.x ) {
|
||||
gc1 = -1; gc3 = -1;
|
||||
gc5 = -1; gc7 = -1;
|
||||
}
|
||||
if ( cell.y+1 >= simData.res.y ) {
|
||||
gc2 = -1; gc3 = -1;
|
||||
gc6 = -1; gc7 = -1;
|
||||
}
|
||||
// Sum Pressure
|
||||
float3 force = make_float3(0,0,0);
|
||||
if (gc0 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc0], gc0, bufPntSort, bufHash );
|
||||
if (gc1 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc1], gc1, bufPntSort, bufHash );
|
||||
if (gc2 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc2], gc2, bufPntSort, bufHash );
|
||||
if (gc3 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc3], gc3, bufPntSort, bufHash );
|
||||
if (gc4 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc4], gc4, bufPntSort, bufHash );
|
||||
if (gc5 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc5], gc5, bufPntSort, bufHash );
|
||||
if (gc6 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc6], gc6, bufPntSort, bufHash );
|
||||
if (gc7 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc7], gc7, bufPntSort, bufHash );
|
||||
|
||||
// Update Force
|
||||
*(float3*) ((char*)pos + OFFSET_FORCE ) = force;
|
||||
|
||||
//}
|
||||
//__syncthreads ();
|
||||
}
|
||||
|
||||
|
||||
__global__ void computeForceNbr ( char* bufPntSort, int numPnt )
|
||||
{
|
||||
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
|
||||
|
||||
if ( ndx < numPnt ) {
|
||||
|
||||
float3* pos = (float3*) (bufPntSort + __mul24(ndx, simData.stride));
|
||||
|
||||
float3* qpos;
|
||||
float press = *(float*) ((char*)pos + OFFSET_PRESS);
|
||||
float dens = *(float*) ((char*)pos + OFFSET_DENS);
|
||||
float3 veval = *(float3*) ((char*)pos + OFFSET_VEVAL );
|
||||
float3 qeval, dist, force;
|
||||
float d = simData.sim_scale;
|
||||
float c, ndistj;
|
||||
float pterm, dterm, vterm;
|
||||
vterm = simData.lapkern * simData.visc;
|
||||
int nbr = __mul24(ndx, MAX_NBR);
|
||||
|
||||
int ncnt = bufNeighbor[ nbr ];
|
||||
|
||||
force = make_float3(0,0,0);
|
||||
for (int j=1; j < ncnt; j++) { // base 1, n[0] = count
|
||||
ndistj = bufNdist[ nbr+j ];
|
||||
qpos = (float3*) (bufPntSort + __mul24( bufNeighbor[ nbr+j ], simData.stride) );
|
||||
c = ( simData.smooth_rad - ndistj );
|
||||
dist.x = ( pos->x - qpos->x )*d; // dist in cm
|
||||
dist.y = ( pos->y - qpos->y )*d;
|
||||
dist.z = ( pos->z - qpos->z )*d;
|
||||
pterm = -0.5f * c * simData.spikykern * ( press + *(float*)((char*)qpos+OFFSET_PRESS) ) / ndistj;
|
||||
dterm = c * dens * *(float*)((char*)qpos+OFFSET_DENS);
|
||||
qeval = *(float3*)((char*)qpos+OFFSET_VEVAL);
|
||||
force.x += ( pterm * dist.x + vterm * ( qeval.x - veval.x )) * dterm;
|
||||
force.y += ( pterm * dist.y + vterm * ( qeval.y - veval.y )) * dterm;
|
||||
force.z += ( pterm * dist.z + vterm * ( qeval.z - veval.z )) * dterm;
|
||||
}
|
||||
*(float3*) ((char*)pos + OFFSET_FORCE ) = force;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
__global__ void advanceParticles ( char* bufPntSort, int numPnt, float dt, float ss )
|
||||
{
|
||||
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
|
||||
|
||||
if ( ndx < numPnt ) {
|
||||
|
||||
// Get particle vars
|
||||
float3* pos = (float3*) (bufPntSort + __mul24(ndx, simData.stride));
|
||||
float3* vel = (float3*) ((char*)pos + OFFSET_VEL );
|
||||
float3* vel_eval = (float3*) ((char*)pos + OFFSET_VEVAL );
|
||||
float3 accel = *(float3*) ((char*)pos + OFFSET_FORCE );
|
||||
float3 vcurr, vnext;
|
||||
|
||||
// Leapfrog integration
|
||||
accel.x *= 0.00020543; // NOTE - To do: SPH_PMASS should be passed in
|
||||
accel.y *= 0.00020543;
|
||||
accel.z *= 0.00020543;
|
||||
accel.z -= 9.8;
|
||||
|
||||
vcurr = *vel;
|
||||
vnext.x = accel.x*dt + vcurr.x;
|
||||
vnext.y = accel.y*dt + vcurr.y;
|
||||
vnext.z = accel.z*dt + vcurr.z; // v(t+1/2) = v(t-1/2) + a(t) dt
|
||||
|
||||
accel.x = (vcurr.x + vnext.x) * 0.5; // v(t+1) = [v(t-1/2) + v(t+1/2)] * 0.5 used to compute forces later
|
||||
accel.y = (vcurr.y + vnext.y) * 0.5; // v(t+1) = [v(t-1/2) + v(t+1/2)] * 0.5 used to compute forces later
|
||||
accel.z = (vcurr.z + vnext.z) * 0.5; // v(t+1) = [v(t-1/2) + v(t+1/2)] * 0.5 used to compute forces later
|
||||
|
||||
*vel_eval = accel;
|
||||
*vel = vnext;
|
||||
|
||||
dt /= simData.sim_scale;
|
||||
vnext.x = pos->x + vnext.x*dt;
|
||||
vnext.y = pos->y + vnext.y*dt;
|
||||
vnext.z = pos->z + vnext.z*dt;
|
||||
*pos = vnext; // p(t+1) = p(t) + v(t+1/2) dt
|
||||
}
|
||||
|
||||
__syncthreads ();
|
||||
}
|
||||
|
||||
#endif
|
||||
/*
|
||||
FLUIDS v.1 - SPH Fluid Simulator for CPU and GPU
|
||||
Copyright (C) 2008. Rama Hoetzlein, http://www.rchoetzlein.com
|
||||
|
||||
ZLib license
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#ifndef _PARTICLES_KERNEL_H_
|
||||
#define _PARTICLES_KERNEL_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "fluid_system_host.cuh"
|
||||
|
||||
#define TOTAL_THREADS 65536
|
||||
#define BLOCK_THREADS 256
|
||||
#define MAX_NBR 80
|
||||
|
||||
__constant__ FluidParams simData; // simulation data (on device)
|
||||
|
||||
__device__ int bufNeighbor[ TOTAL_THREADS*MAX_NBR ];
|
||||
__device__ float bufNdist[ TOTAL_THREADS*MAX_NBR ];
|
||||
|
||||
#define COLOR(r,g,b) ( (uint((r)*255.0f)<<24) | (uint((g)*255.0f)<<16) | (uint((b)*255.0f)<<8) )
|
||||
#define COLORA(r,g,b,a) ( (uint((r)*255.0f)<<24) | (uint((g)*255.0f)<<16) | (uint((b)*255.0f)<<8) | uint((a)*255.0f) )
|
||||
|
||||
#define NULL_HASH 333333
|
||||
|
||||
#define OFFSET_CLR 12
|
||||
#define OFFSET_NEXT 16
|
||||
#define OFFSET_VEL 20
|
||||
#define OFFSET_VEVAL 32
|
||||
#define OFFSET_PRESS 48
|
||||
#define OFFSET_DENS 52
|
||||
#define OFFSET_FORCE 56
|
||||
|
||||
|
||||
__global__ void hashParticles ( char* bufPnts, uint2* bufHash, int numPnt )
|
||||
{
|
||||
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
|
||||
float3* pos = (float3*) (bufPnts + __mul24(ndx, simData.stride) );
|
||||
int gz = (pos->z - simData.min.z) * simData.delta.z ;
|
||||
int gy = (pos->y - simData.min.y) * simData.delta.y ;
|
||||
int gx = (pos->x - simData.min.x) * simData.delta.x ;
|
||||
if ( ndx >= numPnt || gx < 0 || gz > simData.res.x-1 || gy < 0 || gy > simData.res.y-1 || gz < 0 || gz > simData.res.z-1 )
|
||||
bufHash[ndx] = make_uint2( NULL_HASH, ndx );
|
||||
else
|
||||
bufHash[ndx] = make_uint2( __mul24(__mul24(gz, (int) simData.res.y)+gy, (int) simData.res.x) + gx, ndx );
|
||||
|
||||
__syncthreads ();
|
||||
}
|
||||
|
||||
__global__ void insertParticles ( char* bufPnts, uint2* bufHash, int* bufGrid, int numPnt, int numGrid )
|
||||
{
|
||||
uint grid_ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // grid cell index
|
||||
|
||||
bufPnts += OFFSET_NEXT;
|
||||
bufGrid[grid_ndx] = -1;
|
||||
for (int n=0; n < numPnt; n++) {
|
||||
if ( bufHash[n].x == grid_ndx ) {
|
||||
*(int*) (bufPnts + __mul24(bufHash[n].y, simData.stride)) = bufGrid[grid_ndx];
|
||||
bufGrid[grid_ndx] = bufHash[n].y;
|
||||
}
|
||||
}
|
||||
__syncthreads ();
|
||||
}
|
||||
|
||||
__global__ void insertParticlesRadix ( char* bufPnts, uint2* bufHash, int* bufGrid, char* bufPntSort, int numPnt, int numGrid )
|
||||
{
|
||||
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
|
||||
|
||||
uint2 bufHashSort = bufHash[ndx];
|
||||
|
||||
__shared__ uint sharedHash[257];
|
||||
sharedHash[threadIdx.x+1] = bufHashSort.x;
|
||||
if ( ndx > 0 && threadIdx.x == 0 ) {
|
||||
volatile uint2 prevData = bufHash[ndx-1];
|
||||
sharedHash[0] = prevData.x;
|
||||
}
|
||||
__syncthreads ();
|
||||
|
||||
if ( (ndx == 0 || bufHashSort.x != sharedHash[threadIdx.x]) && bufHashSort.x != NULL_HASH ) {
|
||||
bufGrid [ bufHashSort.x ] = ndx;
|
||||
}
|
||||
if ( ndx < numPnt ) {
|
||||
char* src = bufPnts + __mul24( bufHashSort.y, simData.stride );
|
||||
char* dest = bufPntSort + __mul24( ndx, simData.stride );
|
||||
|
||||
*(float3*)(dest) = *(float3*)(src);
|
||||
*(uint*) (dest + OFFSET_CLR) = *(uint*) (src + OFFSET_CLR);
|
||||
*(float3*)(dest + OFFSET_VEL) = *(float3*)(src + OFFSET_VEL);
|
||||
*(float3*)(dest + OFFSET_VEVAL) = *(float3*)(src + OFFSET_VEVAL);
|
||||
|
||||
*(float*) (dest + OFFSET_DENS) = 0.0;
|
||||
*(float*) (dest + OFFSET_PRESS) = 0.0;
|
||||
*(float3*) (dest + OFFSET_FORCE)= make_float3(0,0,0);
|
||||
*(int*) (dest + OFFSET_NEXT) = bufHashSort.x;
|
||||
}
|
||||
|
||||
__syncthreads ();
|
||||
|
||||
}
|
||||
|
||||
//__shared__ int ncount [ BLOCK_THREADS ];
|
||||
|
||||
__device__ float contributePressure ( int pndx, float3* p, int qndx, int grid_ndx, char* bufPnts, uint2* bufHash )
|
||||
{
|
||||
float3* qpos;
|
||||
float3 dist;
|
||||
float dsq, c, sum;
|
||||
float d = simData.sim_scale;
|
||||
int nbr = __mul24(pndx, MAX_NBR);
|
||||
|
||||
sum = 0.0;
|
||||
for ( ; qndx < simData.pnts; qndx++ ) {
|
||||
|
||||
if ( bufHash[qndx].x != grid_ndx || qndx == NULL_HASH) break;
|
||||
|
||||
if ( qndx != pndx ) {
|
||||
qpos = (float3*) ( bufPnts + __mul24(qndx, simData.stride ));
|
||||
|
||||
dist.x = ( p->x - qpos->x )*d; // dist in cm
|
||||
dist.y = ( p->y - qpos->y )*d;
|
||||
dist.z = ( p->z - qpos->z )*d;
|
||||
dsq = (dist.x*dist.x + dist.y*dist.y + dist.z*dist.z);
|
||||
if ( dsq < simData.r2 ) {
|
||||
c = simData.r2 - dsq;
|
||||
sum += c * c * c;
|
||||
if ( bufNeighbor[nbr] < MAX_NBR ) {
|
||||
bufNeighbor[ nbr+bufNeighbor[nbr] ] = qndx;
|
||||
bufNdist[ nbr+bufNeighbor[nbr] ] = sqrt(dsq);
|
||||
bufNeighbor[nbr]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
//curr = *(int*) (bufPnts + __mul24(curr, simData.stride) + OFFSET_NEXT);
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
/*if ( ncount[threadIdx.x] < MAX_NBR ) {
|
||||
bufNeighbor [ nbr + ncount[threadIdx.x] ] = curr;
|
||||
bufNdist [ nbr + ncount[threadIdx.x] ] = sqrt(dsq);
|
||||
ncount[threadIdx.x]++;
|
||||
}*/
|
||||
|
||||
__global__ void computePressure ( char* bufPntSort, int* bufGrid, uint2* bufHash, int numPnt )
|
||||
{
|
||||
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
|
||||
|
||||
//if ( ndx < 1024 ) {
|
||||
|
||||
float3* pos = (float3*) (bufPntSort + __mul24(ndx, simData.stride));
|
||||
|
||||
// Find 2x2x2 grid cells
|
||||
// - Use registers only, no arrays (local-memory too slow)
|
||||
int3 cell;
|
||||
int gc0, gc1, gc2, gc3, gc4, gc5, gc6, gc7;
|
||||
float gs = simData.smooth_rad / simData.sim_scale;
|
||||
|
||||
cell.x = max(0, (int)((-gs + pos->x - simData.min.x) * simData.delta.x));
|
||||
cell.y = max(0, (int)((-gs + pos->y - simData.min.y) * simData.delta.y));
|
||||
cell.z = max(0, (int)((-gs + pos->z - simData.min.z) * simData.delta.z));
|
||||
gc0 = __mul24(__mul24(cell.z, simData.res.y) + cell.y, simData.res.x) + cell.x;
|
||||
gc1 = gc0 + 1;
|
||||
gc2 = gc0 + simData.res.x;
|
||||
gc3 = gc2 + 1;
|
||||
if ( cell.z+1 < simData.res.z ) {
|
||||
gc4 = gc0 + __mul24(simData.res.x, simData.res.y);
|
||||
gc5 = gc4 + 1;
|
||||
gc6 = gc4 + simData.res.x;
|
||||
gc7 = gc6 + 1;
|
||||
}
|
||||
if ( cell.x+1 >= simData.res.x ) {
|
||||
gc1 = -1; gc3 = -1;
|
||||
gc5 = -1; gc7 = -1;
|
||||
}
|
||||
if ( cell.y+1 >= simData.res.y ) {
|
||||
gc2 = -1; gc3 = -1;
|
||||
gc6 = -1; gc7 = -1;
|
||||
}
|
||||
// Sum Pressure
|
||||
float sum = 0.0;
|
||||
bufNeighbor[ __mul24(ndx, MAX_NBR) ] = 1;
|
||||
if (gc0 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc0], gc0, bufPntSort, bufHash );
|
||||
if (gc1 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc1], gc1, bufPntSort, bufHash );
|
||||
if (gc2 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc2], gc2, bufPntSort, bufHash );
|
||||
if (gc3 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc3], gc3, bufPntSort, bufHash );
|
||||
if (gc4 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc4], gc4, bufPntSort, bufHash );
|
||||
if (gc5 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc5], gc5, bufPntSort, bufHash );
|
||||
if (gc6 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc6], gc6, bufPntSort, bufHash );
|
||||
if (gc7 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc7], gc7, bufPntSort, bufHash );
|
||||
|
||||
// Compute Density & Pressure
|
||||
sum = sum * simData.pmass * simData.poly6kern;
|
||||
if ( sum == 0.0 ) sum = 1.0;
|
||||
*(float*) ((char*)pos + OFFSET_PRESS) = ( sum - simData.rest_dens ) * simData.stiffness;
|
||||
*(float*) ((char*)pos + OFFSET_DENS) = 1.0f / sum;
|
||||
|
||||
//}
|
||||
//__syncthreads ();
|
||||
}
|
||||
|
||||
__device__ void contributeForce ( float3& force, int pndx, float3* p, int qndx, int grid_ndx, char* bufPnts, uint2* bufHash )
|
||||
{
|
||||
float press = *(float*) ((char*)p + OFFSET_PRESS);
|
||||
float dens = *(float*) ((char*)p + OFFSET_DENS);
|
||||
float3 veval = *(float3*) ((char*)p + OFFSET_VEVAL );
|
||||
float3 qeval, dist;
|
||||
float c, ndistj, dsq;
|
||||
float pterm, dterm, vterm;
|
||||
float3* qpos;
|
||||
float d = simData.sim_scale;
|
||||
|
||||
vterm = simData.lapkern * simData.visc;
|
||||
|
||||
for ( ; qndx < simData.pnts; qndx++ ) {
|
||||
|
||||
if ( bufHash[qndx].x != grid_ndx || qndx == NULL_HASH) break;
|
||||
|
||||
if ( qndx != pndx ) {
|
||||
qpos = (float3*) ( bufPnts + __mul24(qndx, simData.stride ));
|
||||
|
||||
dist.x = ( p->x - qpos->x )*d; // dist in cm
|
||||
dist.y = ( p->y - qpos->y )*d;
|
||||
dist.z = ( p->z - qpos->z )*d;
|
||||
dsq = (dist.x*dist.x + dist.y*dist.y + dist.z*dist.z);
|
||||
if ( dsq < simData.r2 ) {
|
||||
ndistj = sqrt(dsq);
|
||||
c = ( simData.smooth_rad - ndistj );
|
||||
dist.x = ( p->x - qpos->x )*d; // dist in cm
|
||||
dist.y = ( p->y - qpos->y )*d;
|
||||
dist.z = ( p->z - qpos->z )*d;
|
||||
pterm = -0.5f * c * simData.spikykern * ( press + *(float*)((char*)qpos+OFFSET_PRESS) ) / ndistj;
|
||||
dterm = c * dens * *(float*)((char*)qpos+OFFSET_DENS);
|
||||
qeval = *(float3*)((char*)qpos+OFFSET_VEVAL);
|
||||
force.x += ( pterm * dist.x + vterm * ( qeval.x - veval.x )) * dterm;
|
||||
force.y += ( pterm * dist.y + vterm * ( qeval.y - veval.y )) * dterm;
|
||||
force.z += ( pterm * dist.z + vterm * ( qeval.z - veval.z )) * dterm;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
__global__ void computeForce ( char* bufPntSort, int* bufGrid, uint2* bufHash, int numPnt )
|
||||
{
|
||||
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
|
||||
|
||||
//if ( ndx < numPnt ) {
|
||||
|
||||
float3* pos = (float3*) (bufPntSort + __mul24(ndx, simData.stride));
|
||||
|
||||
// Find 2x2x2 grid cells
|
||||
// - Use registers only, no arrays (local-memory too slow)
|
||||
int3 cell;
|
||||
int gc0, gc1, gc2, gc3, gc4, gc5, gc6, gc7;
|
||||
float gs = simData.smooth_rad / simData.sim_scale;
|
||||
|
||||
cell.x = max(0, (int)((-gs + pos->x - simData.min.x) * simData.delta.x));
|
||||
cell.y = max(0, (int)((-gs + pos->y - simData.min.y) * simData.delta.y));
|
||||
cell.z = max(0, (int)((-gs + pos->z - simData.min.z) * simData.delta.z));
|
||||
gc0 = __mul24(__mul24(cell.z, simData.res.y) + cell.y, simData.res.x) + cell.x;
|
||||
gc1 = gc0 + 1;
|
||||
gc2 = gc0 + simData.res.x;
|
||||
gc3 = gc2 + 1;
|
||||
if ( cell.z+1 < simData.res.z ) {
|
||||
gc4 = gc0 + __mul24(simData.res.x, simData.res.y);
|
||||
gc5 = gc4 + 1;
|
||||
gc6 = gc4 + simData.res.x;
|
||||
gc7 = gc6 + 1;
|
||||
}
|
||||
if ( cell.x+1 >= simData.res.x ) {
|
||||
gc1 = -1; gc3 = -1;
|
||||
gc5 = -1; gc7 = -1;
|
||||
}
|
||||
if ( cell.y+1 >= simData.res.y ) {
|
||||
gc2 = -1; gc3 = -1;
|
||||
gc6 = -1; gc7 = -1;
|
||||
}
|
||||
// Sum Pressure
|
||||
float3 force = make_float3(0,0,0);
|
||||
if (gc0 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc0], gc0, bufPntSort, bufHash );
|
||||
if (gc1 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc1], gc1, bufPntSort, bufHash );
|
||||
if (gc2 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc2], gc2, bufPntSort, bufHash );
|
||||
if (gc3 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc3], gc3, bufPntSort, bufHash );
|
||||
if (gc4 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc4], gc4, bufPntSort, bufHash );
|
||||
if (gc5 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc5], gc5, bufPntSort, bufHash );
|
||||
if (gc6 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc6], gc6, bufPntSort, bufHash );
|
||||
if (gc7 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc7], gc7, bufPntSort, bufHash );
|
||||
|
||||
// Update Force
|
||||
*(float3*) ((char*)pos + OFFSET_FORCE ) = force;
|
||||
|
||||
//}
|
||||
//__syncthreads ();
|
||||
}
|
||||
|
||||
|
||||
__global__ void computeForceNbr ( char* bufPntSort, int numPnt )
|
||||
{
|
||||
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
|
||||
|
||||
if ( ndx < numPnt ) {
|
||||
|
||||
float3* pos = (float3*) (bufPntSort + __mul24(ndx, simData.stride));
|
||||
|
||||
float3* qpos;
|
||||
float press = *(float*) ((char*)pos + OFFSET_PRESS);
|
||||
float dens = *(float*) ((char*)pos + OFFSET_DENS);
|
||||
float3 veval = *(float3*) ((char*)pos + OFFSET_VEVAL );
|
||||
float3 qeval, dist, force;
|
||||
float d = simData.sim_scale;
|
||||
float c, ndistj;
|
||||
float pterm, dterm, vterm;
|
||||
vterm = simData.lapkern * simData.visc;
|
||||
int nbr = __mul24(ndx, MAX_NBR);
|
||||
|
||||
int ncnt = bufNeighbor[ nbr ];
|
||||
|
||||
force = make_float3(0,0,0);
|
||||
for (int j=1; j < ncnt; j++) { // base 1, n[0] = count
|
||||
ndistj = bufNdist[ nbr+j ];
|
||||
qpos = (float3*) (bufPntSort + __mul24( bufNeighbor[ nbr+j ], simData.stride) );
|
||||
c = ( simData.smooth_rad - ndistj );
|
||||
dist.x = ( pos->x - qpos->x )*d; // dist in cm
|
||||
dist.y = ( pos->y - qpos->y )*d;
|
||||
dist.z = ( pos->z - qpos->z )*d;
|
||||
pterm = -0.5f * c * simData.spikykern * ( press + *(float*)((char*)qpos+OFFSET_PRESS) ) / ndistj;
|
||||
dterm = c * dens * *(float*)((char*)qpos+OFFSET_DENS);
|
||||
qeval = *(float3*)((char*)qpos+OFFSET_VEVAL);
|
||||
force.x += ( pterm * dist.x + vterm * ( qeval.x - veval.x )) * dterm;
|
||||
force.y += ( pterm * dist.y + vterm * ( qeval.y - veval.y )) * dterm;
|
||||
force.z += ( pterm * dist.z + vterm * ( qeval.z - veval.z )) * dterm;
|
||||
}
|
||||
*(float3*) ((char*)pos + OFFSET_FORCE ) = force;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
__global__ void advanceParticles ( char* bufPntSort, int numPnt, float dt, float ss )
|
||||
{
|
||||
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
|
||||
|
||||
if ( ndx < numPnt ) {
|
||||
|
||||
// Get particle vars
|
||||
float3* pos = (float3*) (bufPntSort + __mul24(ndx, simData.stride));
|
||||
float3* vel = (float3*) ((char*)pos + OFFSET_VEL );
|
||||
float3* vel_eval = (float3*) ((char*)pos + OFFSET_VEVAL );
|
||||
float3 accel = *(float3*) ((char*)pos + OFFSET_FORCE );
|
||||
float3 vcurr, vnext;
|
||||
|
||||
// Leapfrog integration
|
||||
accel.x *= 0.00020543; // NOTE - To do: SPH_PMASS should be passed in
|
||||
accel.y *= 0.00020543;
|
||||
accel.z *= 0.00020543;
|
||||
accel.z -= 9.8;
|
||||
|
||||
vcurr = *vel;
|
||||
vnext.x = accel.x*dt + vcurr.x;
|
||||
vnext.y = accel.y*dt + vcurr.y;
|
||||
vnext.z = accel.z*dt + vcurr.z; // v(t+1/2) = v(t-1/2) + a(t) dt
|
||||
|
||||
accel.x = (vcurr.x + vnext.x) * 0.5; // v(t+1) = [v(t-1/2) + v(t+1/2)] * 0.5 used to compute forces later
|
||||
accel.y = (vcurr.y + vnext.y) * 0.5; // v(t+1) = [v(t-1/2) + v(t+1/2)] * 0.5 used to compute forces later
|
||||
accel.z = (vcurr.z + vnext.z) * 0.5; // v(t+1) = [v(t-1/2) + v(t+1/2)] * 0.5 used to compute forces later
|
||||
|
||||
*vel_eval = accel;
|
||||
*vel = vnext;
|
||||
|
||||
dt /= simData.sim_scale;
|
||||
vnext.x = pos->x + vnext.x*dt;
|
||||
vnext.y = pos->y + vnext.y*dt;
|
||||
vnext.z = pos->z + vnext.z*dt;
|
||||
*pos = vnext; // p(t+1) = p(t) + v(t+1/2) dt
|
||||
}
|
||||
|
||||
__syncthreads ();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user