fix: some file didn't have the svn:eol-style native yet

This commit is contained in:
erwin.coumans
2010-03-06 15:23:36 +00:00
parent 4fd48ac691
commit 81f04a4d48
641 changed files with 301123 additions and 301123 deletions

View File

@@ -1,44 +1,44 @@
/*
FLUIDS v.1 - SPH Fluid Simulator for CPU and GPU
Copyright (C) 2008. Rama Hoetzlein, http://www.rchoetzlein.com
ZLib license
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef DEF_FLUID
#define DEF_FLUID
#include "vector.h"
#include "common_defs.h"
struct Fluid {
public:
Vector3DF pos; // Basic particle (must match Particle class)
DWORD clr;
int next;
Vector3DF vel;
Vector3DF vel_eval;
unsigned short age;
float pressure; // Smoothed Particle Hydrodynamics
float density;
Vector3DF sph_force;
};
#endif /*PARTICLE_H_*/
/*
FLUIDS v.1 - SPH Fluid Simulator for CPU and GPU
Copyright (C) 2008. Rama Hoetzlein, http://www.rchoetzlein.com
ZLib license
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef DEF_FLUID
#define DEF_FLUID
#include "vector.h"
#include "common_defs.h"
struct Fluid {
public:
Vector3DF pos; // Basic particle (must match Particle class)
DWORD clr;
int next;
Vector3DF vel;
Vector3DF vel_eval;
unsigned short age;
float pressure; // Smoothed Particle Hydrodynamics
float density;
Vector3DF sph_force;
};
#endif /*PARTICLE_H_*/

File diff suppressed because it is too large Load Diff

View File

@@ -1,71 +1,71 @@
/*
FLUIDS v.1 - SPH Fluid Simulator for CPU and GPU
Copyright (C) 2009. Rama Hoetzlein, http://www.rchoetzlein.com
ZLib license
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include <cutil.h>
#include <cstdlib>
#include <cstdio>
#include <string.h>
#if defined(__APPLE__) || defined(MACOSX)
#include <GLUT/glut.h>
#else
#include <GL/glut.h>
#endif
#include <cuda_gl_interop.h>
#include "fluid_system_kern.cu"
extern "C"
{
// Compute number of blocks to create
int iDivUp (int a, int b) {
return (a % b != 0) ? (a / b + 1) : (a / b);
}
void computeNumBlocks (int numPnts, int minThreads, int &numBlocks, int &numThreads)
{
numThreads = min( minThreads, numPnts );
numBlocks = iDivUp ( numPnts, numThreads );
}
void Grid_InsertParticlesCUDA ( uchar* data, uint stride, uint numPoints )
{
int numThreads, numBlocks;
computeNumBlocks (numPoints, 256, numBlocks, numThreads);
// transfer point data to device
char* pntData;
size = numPoints * stride;
cudaMalloc( (void**) &pntData, size);
cudaMemcpy( pntData, data, size, cudaMemcpyHostToDevice);
// execute the kernel
insertParticles<<< numBlocks, numThreads >>> ( pntData, stride );
// transfer data back to host
cudaMemcpy( data, pntData, cudaMemcpyDeviceToHost);
// check if kernel invocation generated an error
CUT_CHECK_ERROR("Kernel execution failed");
CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboPos));
/*
FLUIDS v.1 - SPH Fluid Simulator for CPU and GPU
Copyright (C) 2009. Rama Hoetzlein, http://www.rchoetzlein.com
ZLib license
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include <cutil.h>
#include <cstdlib>
#include <cstdio>
#include <string.h>
#if defined(__APPLE__) || defined(MACOSX)
#include <GLUT/glut.h>
#else
#include <GL/glut.h>
#endif
#include <cuda_gl_interop.h>
#include "fluid_system_kern.cu"
extern "C"
{
// Compute number of blocks to create
int iDivUp (int a, int b) {
return (a % b != 0) ? (a / b + 1) : (a / b);
}
void computeNumBlocks (int numPnts, int minThreads, int &numBlocks, int &numThreads)
{
numThreads = min( minThreads, numPnts );
numBlocks = iDivUp ( numPnts, numThreads );
}
void Grid_InsertParticlesCUDA ( uchar* data, uint stride, uint numPoints )
{
int numThreads, numBlocks;
computeNumBlocks (numPoints, 256, numBlocks, numThreads);
// transfer point data to device
char* pntData;
size = numPoints * stride;
cudaMalloc( (void**) &pntData, size);
cudaMemcpy( pntData, data, size, cudaMemcpyHostToDevice);
// execute the kernel
insertParticles<<< numBlocks, numThreads >>> ( pntData, stride );
// transfer data back to host
cudaMemcpy( data, pntData, cudaMemcpyDeviceToHost);
// check if kernel invocation generated an error
CUT_CHECK_ERROR("Kernel execution failed");
CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboPos));
}

View File

@@ -1,106 +1,106 @@
/*
FLUIDS v.1 - SPH Fluid Simulator for CPU and GPU
Copyright (C) 2008. Rama Hoetzlein, http://www.rchoetzlein.com
ZLib license
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef DEF_FLUID_SYS
#define DEF_FLUID_SYS
#include <iostream>
#include <vector>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "point_set.h"
#include "fluid.h"
// Scalar params
#define SPH_SIMSIZE 4
#define SPH_SIMSCALE 5
#define SPH_VISC 6
#define SPH_RESTDENSITY 7
#define SPH_PMASS 8
#define SPH_PRADIUS 9
#define SPH_PDIST 10
#define SPH_SMOOTHRADIUS 11
#define SPH_INTSTIFF 12
#define SPH_EXTSTIFF 13
#define SPH_EXTDAMP 14
#define SPH_LIMIT 15
#define BOUND_ZMIN_SLOPE 16
#define FORCE_XMAX_SIN 17
#define FORCE_XMIN_SIN 18
#define MAX_FRAC 19
#define CLR_MODE 20
// Vector params
#define SPH_VOLMIN 7
#define SPH_VOLMAX 8
#define SPH_INITMIN 9
#define SPH_INITMAX 10
// Toggles
#define SPH_GRID 0
#define SPH_DEBUG 1
#define WRAP_X 2
#define WALL_BARRIER 3
#define LEVY_BARRIER 4
#define DRAIN_BARRIER 5
#define USE_CUDA 6
#define MAX_PARAM 21
#define BFLUID 2
class FluidSystem : public PointSet {
public:
FluidSystem ();
// Basic Particle System
virtual void Initialize ( int mode, int nmax );
virtual void Reset ( int nmax );
virtual void Run ();
virtual void Advance ();
virtual int AddPoint ();
virtual int AddPointReuse ();
Fluid* AddFluid () { return (Fluid*) GetElem(0, AddPointReuse()); }
Fluid* GetFluid (int n) { return (Fluid*) GetElem(0, n); }
// Smoothed Particle Hydrodynamics
void SPH_Setup ();
void SPH_CreateExample ( int n, int nmax );
void SPH_DrawDomain ();
void SPH_ComputeKernels ();
void SPH_ComputePressureSlow (); // O(n^2)
void SPH_ComputePressureGrid (); // O(kn) - spatial grid
void SPH_ComputeForceSlow (); // O(n^2)
void SPH_ComputeForceGrid (); // O(kn) - spatial grid
void SPH_ComputeForceGridNC (); // O(cn) - neighbor table
private:
// Smoothed Particle Hydrodynamics
double m_R2, m_Poly6Kern, m_LapKern, m_SpikyKern; // Kernel functions
};
#endif
/*
FLUIDS v.1 - SPH Fluid Simulator for CPU and GPU
Copyright (C) 2008. Rama Hoetzlein, http://www.rchoetzlein.com
ZLib license
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef DEF_FLUID_SYS
#define DEF_FLUID_SYS
#include <iostream>
#include <vector>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "point_set.h"
#include "fluid.h"
// Scalar params
#define SPH_SIMSIZE 4
#define SPH_SIMSCALE 5
#define SPH_VISC 6
#define SPH_RESTDENSITY 7
#define SPH_PMASS 8
#define SPH_PRADIUS 9
#define SPH_PDIST 10
#define SPH_SMOOTHRADIUS 11
#define SPH_INTSTIFF 12
#define SPH_EXTSTIFF 13
#define SPH_EXTDAMP 14
#define SPH_LIMIT 15
#define BOUND_ZMIN_SLOPE 16
#define FORCE_XMAX_SIN 17
#define FORCE_XMIN_SIN 18
#define MAX_FRAC 19
#define CLR_MODE 20
// Vector params
#define SPH_VOLMIN 7
#define SPH_VOLMAX 8
#define SPH_INITMIN 9
#define SPH_INITMAX 10
// Toggles
#define SPH_GRID 0
#define SPH_DEBUG 1
#define WRAP_X 2
#define WALL_BARRIER 3
#define LEVY_BARRIER 4
#define DRAIN_BARRIER 5
#define USE_CUDA 6
#define MAX_PARAM 21
#define BFLUID 2
class FluidSystem : public PointSet {
public:
FluidSystem ();
// Basic Particle System
virtual void Initialize ( int mode, int nmax );
virtual void Reset ( int nmax );
virtual void Run ();
virtual void Advance ();
virtual int AddPoint ();
virtual int AddPointReuse ();
Fluid* AddFluid () { return (Fluid*) GetElem(0, AddPointReuse()); }
Fluid* GetFluid (int n) { return (Fluid*) GetElem(0, n); }
// Smoothed Particle Hydrodynamics
void SPH_Setup ();
void SPH_CreateExample ( int n, int nmax );
void SPH_DrawDomain ();
void SPH_ComputeKernels ();
void SPH_ComputePressureSlow (); // O(n^2)
void SPH_ComputePressureGrid (); // O(kn) - spatial grid
void SPH_ComputeForceSlow (); // O(n^2)
void SPH_ComputeForceGrid (); // O(kn) - spatial grid
void SPH_ComputeForceGridNC (); // O(cn) - neighbor table
private:
// Smoothed Particle Hydrodynamics
double m_R2, m_Poly6Kern, m_LapKern, m_SpikyKern; // Kernel functions
};
#endif

View File

@@ -1,250 +1,250 @@
/*
FLUIDS v.1 - SPH Fluid Simulator for CPU and GPU
Copyright (C) 2008. Rama Hoetzlein, http://www.rchoetzlein.com
ZLib license
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//#include "C:\CUDA\common\inc\cutil.h" // cutil32.lib
#include <string.h>
#include "../CUDA/btCudaDefines.h"
#if defined(__APPLE__) || defined(MACOSX)
#include <GLUT/glut.h>
#else
#include <GL/glut.h>
#endif
#include <cuda_gl_interop.h>
#include "radixsort.cu"
#include "fluid_system_kern.cu" // build kernel
FluidParams fcuda;
__device__ char* bufPnts; // point data (array of Fluid structs)
__device__ char* bufPntSort; // point data (array of Fluid structs)
__device__ uint* bufHash[2]; // point grid hash
__device__ int* bufGrid;
extern "C"
{
// Initialize CUDA
void cudaInit(int argc, char **argv)
{
//CUT_DEVICE_INIT(argc, argv);
cudaDeviceProp p;
cudaGetDeviceProperties ( &p, 0);
printf ( "-- CUDA --\n" );
printf ( "Name: %s\n", p.name );
printf ( "Revision: %d.%d\n", p.major, p.minor );
printf ( "Global Mem: %d\n", p.totalGlobalMem );
printf ( "Shared/Blk: %d\n", p.sharedMemPerBlock );
printf ( "Regs/Blk: %d\n", p.regsPerBlock );
printf ( "Warp Size: %d\n", p.warpSize );
printf ( "Mem Pitch: %d\n", p.memPitch );
printf ( "Thrds/Blk: %d\n", p.maxThreadsPerBlock );
printf ( "Const Mem: %d\n", p.totalConstMem );
printf ( "Clock Rate: %d\n", p.clockRate );
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufPnts, 10 ) );
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufPntSort, 10 ) );
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufHash, 10 ) );
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufGrid, 10 ) );
};
// Compute number of blocks to create
int iDivUp (int a, int b) {
return (a % b != 0) ? (a / b + 1) : (a / b);
}
void computeNumBlocks (int numPnts, int maxThreads, int &numBlocks, int &numThreads)
{
numThreads = min( maxThreads, numPnts );
numBlocks = iDivUp ( numPnts, numThreads );
}
void FluidClearCUDA ()
{
BT_GPU_SAFE_CALL ( cudaFree ( bufPnts ) );
BT_GPU_SAFE_CALL ( cudaFree ( bufPntSort ) );
BT_GPU_SAFE_CALL ( cudaFree ( bufHash[0] ) );
BT_GPU_SAFE_CALL ( cudaFree ( bufHash[1] ) );
BT_GPU_SAFE_CALL ( cudaFree ( bufGrid ) );
}
void FluidSetupCUDA ( int num, int stride, float3 min, float3 max, float3 res, float3 size, int chk )
{
fcuda.min = make_float3(min.x, min.y, min.z);
fcuda.max = make_float3(max.x, max.y, max.z);
fcuda.res = make_float3(res.x, res.y, res.z);
fcuda.size = make_float3(size.x, size.y, size.z);
fcuda.pnts = num;
fcuda.delta.x = res.x / size.x;
fcuda.delta.y = res.y / size.y;
fcuda.delta.z = res.z / size.z;
fcuda.cells = res.x*res.y*res.z;
fcuda.chk = chk;
computeNumBlocks ( fcuda.pnts, 256, fcuda.numBlocks, fcuda.numThreads); // particles
computeNumBlocks ( fcuda.cells, 256, fcuda.gridBlocks, fcuda.gridThreads); // grid cell
fcuda.szPnts = (fcuda.numBlocks * fcuda.numThreads) * stride;
fcuda.szHash = (fcuda.numBlocks * fcuda.numThreads) * sizeof(uint2); // <cell, particle> pairs
fcuda.szGrid = (fcuda.gridBlocks * fcuda.gridThreads) * sizeof(uint);
fcuda.stride = stride;
printf ( "pnts: %d, t:%dx%d=%d, bufPnts:%d, bufHash:%d\n", fcuda.pnts, fcuda.numBlocks, fcuda.numThreads, fcuda.numBlocks*fcuda.numThreads, fcuda.szPnts, fcuda.szHash );
printf ( "grds: %d, t:%dx%d=%d, bufGrid:%d, Res: %dx%dx%d\n", fcuda.cells, fcuda.gridBlocks, fcuda.gridThreads, fcuda.gridBlocks*fcuda.gridThreads, fcuda.szGrid, (int) fcuda.res.x, (int) fcuda.res.y, (int) fcuda.res.z );
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufPnts, fcuda.szPnts ) );
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufPntSort, fcuda.szPnts ) );
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufHash[0], fcuda.szHash ) );
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufHash[1], fcuda.szHash ) );
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufGrid, fcuda.szGrid ) );
printf ( "POINTERS\n");
printf ( "bufPnts: %p\n", bufPnts );
printf ( "bufPntSort: %p\n", bufPntSort );
printf ( "bufHash0: %p\n", bufHash[0] );
printf ( "bufHash1: %p\n", bufHash[1] );
printf ( "bufGrid: %p\n", bufGrid );
BT_GPU_SAFE_CALL ( cudaMemcpyToSymbol ( simData, &fcuda, sizeof(FluidParams) ) );
cudaThreadSynchronize ();
}
void FluidParamCUDA ( float sim_scale, float smooth_rad, float mass, float rest, float stiff, float visc )
{
fcuda.sim_scale = sim_scale;
fcuda.smooth_rad = smooth_rad;
fcuda.r2 = smooth_rad * smooth_rad;
fcuda.pmass = mass;
fcuda.rest_dens = rest;
fcuda.stiffness = stiff;
fcuda.visc = visc;
fcuda.pdist = pow ( fcuda.pmass / fcuda.rest_dens, 1/3.0f );
fcuda.poly6kern = 315.0f / (64.0f * 3.141592 * pow( smooth_rad, 9.0f) );
fcuda.spikykern = -45.0f / (3.141592 * pow( smooth_rad, 6.0f) );
fcuda.lapkern = 45.0f / (3.141592 * pow( smooth_rad, 6.0f) );
BT_GPU_SAFE_CALL( cudaMemcpyToSymbol ( simData, &fcuda, sizeof(FluidParams) ) );
cudaThreadSynchronize ();
}
void TransferToCUDA ( char* data, int* grid, int numPoints )
{
BT_GPU_SAFE_CALL( cudaMemcpy ( bufPnts, data, numPoints * fcuda.stride, cudaMemcpyHostToDevice ) );
cudaThreadSynchronize ();
}
void TransferFromCUDA ( char* data, int* grid, int numPoints )
{
BT_GPU_SAFE_CALL( cudaMemcpy ( data, bufPntSort, numPoints * fcuda.stride, cudaMemcpyDeviceToHost ) );
cudaThreadSynchronize ();
BT_GPU_SAFE_CALL( cudaMemcpy ( grid, bufGrid, fcuda.cells * sizeof(uint), cudaMemcpyDeviceToHost ) );
}
void Grid_InsertParticlesCUDA ()
{
BT_GPU_SAFE_CALL( cudaMemset ( bufHash[0], 0, fcuda.szHash ) );
hashParticles<<< fcuda.numBlocks, fcuda.numThreads>>> ( bufPnts, (uint2*) bufHash[0], fcuda.pnts );
BT_GPU_CHECK_ERROR( "Kernel execution failed");
cudaThreadSynchronize ();
//int buf[20000];
/*printf ( "HASH: %d (%d)\n", fcuda.pnts, fcuda.numBlocks*fcuda.numThreads );
BT_GPU_SAFE_CALL( cudaMemcpy ( buf, bufHash[0], fcuda.pnts * 2*sizeof(uint), cudaMemcpyDeviceToHost ) );
//for (int n=0; n < fcuda.numBlocks*fcuda.numThreads; n++) {
for (int n=0; n < 100; n++) {
printf ( "%d: <%d,%d>\n", n, buf[n*2], buf[n*2+1] );
}*/
RadixSort( (KeyValuePair *) bufHash[0], (KeyValuePair *) bufHash[1], fcuda.pnts, 32);
BT_GPU_CHECK_ERROR( "Kernel execution failed");
cudaThreadSynchronize ();
/*printf ( "HASH: %d (%d)\n", fcuda.pnts, fcuda.numBlocks*fcuda.numThreads );
BT_GPU_SAFE_CALL( cudaMemcpy ( buf, bufHash[0], fcuda.pnts * 2*sizeof(uint), cudaMemcpyDeviceToHost ) );
//for (int n=0; n < fcuda.numBlocks*fcuda.numThreads; n++) {
for (int n=0; n < 100; n++) {
printf ( "%d: <%d,%d>\n", n, buf[n*2], buf[n*2+1] );
}*/
// insertParticles<<< fcuda.gridBlocks, fcuda.gridThreads>>> ( bufPnts, (uint2*) bufHash[0], bufGrid, fcuda.pnts, fcuda.cells );
BT_GPU_SAFE_CALL( cudaMemset ( bufGrid, NULL_HASH, fcuda.cells * sizeof(uint) ) );
insertParticlesRadix<<< fcuda.numBlocks, fcuda.numThreads>>> ( bufPnts, (uint2*) bufHash[0], bufGrid, bufPntSort, fcuda.pnts, fcuda.cells );
BT_GPU_CHECK_ERROR( "Kernel execution failed");
cudaThreadSynchronize ();
/*printf ( "GRID: %d\n", fcuda.cells );
BT_GPU_SAFE_CALL( cudaMemcpy ( buf, bufGrid, fcuda.cells * sizeof(uint), cudaMemcpyDeviceToHost ) );
*for (int n=0; n < 100; n++) {
printf ( "%d: %d\n", n, buf[n]);
}*/
}
void SPH_ComputePressureCUDA ()
{
computePressure<<< fcuda.numBlocks, fcuda.numThreads>>> ( bufPntSort, bufGrid, (uint2*) bufHash[0], fcuda.pnts );
BT_GPU_CHECK_ERROR( "Kernel execution failed");
cudaThreadSynchronize ();
}
void SPH_ComputeForceCUDA ()
{
//-- standard force
//computeForce<<< fcuda.numBlocks, fcuda.numThreads>>> ( bufPntSort, bufGrid, (uint2*) bufHash[0], fcuda.pnts );
// Force using neighbor table
computeForceNbr<<< fcuda.numBlocks, fcuda.numThreads>>> ( bufPntSort, fcuda.pnts );
BT_GPU_CHECK_ERROR( "Kernel execution failed");
cudaThreadSynchronize ();
}
void SPH_AdvanceCUDA ( float dt, float ss )
{
advanceParticles<<< fcuda.numBlocks, fcuda.numThreads>>> ( bufPntSort, fcuda.pnts, dt, ss );
BT_GPU_CHECK_ERROR( "Kernel execution failed");
cudaThreadSynchronize ();
}
} // extern C
//----------- Per frame: Malloc/Free, Host<->Device
// transfer point data to device
/*char* pntData;
int size = (fcuda.numBlocks*fcuda.numThreads) * stride;
cudaMalloc( (void**) &pntData, size);
cudaMemcpy( pntData, data, numPoints*stride, cudaMemcpyHostToDevice);
insertParticles<<< fcuda.numBlocks, fcuda.numThreads >>> ( pntData, stride, numPoints );
cudaMemcpy( data, pntData, numPoints*stride, cudaMemcpyDeviceToHost);
cudaFree( pntData );*/
/*
FLUIDS v.1 - SPH Fluid Simulator for CPU and GPU
Copyright (C) 2008. Rama Hoetzlein, http://www.rchoetzlein.com
ZLib license
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//#include "C:\CUDA\common\inc\cutil.h" // cutil32.lib
#include <string.h>
#include "../CUDA/btCudaDefines.h"
#if defined(__APPLE__) || defined(MACOSX)
#include <GLUT/glut.h>
#else
#include <GL/glut.h>
#endif
#include <cuda_gl_interop.h>
#include "radixsort.cu"
#include "fluid_system_kern.cu" // build kernel
FluidParams fcuda;
__device__ char* bufPnts; // point data (array of Fluid structs)
__device__ char* bufPntSort; // point data (array of Fluid structs)
__device__ uint* bufHash[2]; // point grid hash
__device__ int* bufGrid;
extern "C"
{
// Initialize CUDA
void cudaInit(int argc, char **argv)
{
//CUT_DEVICE_INIT(argc, argv);
cudaDeviceProp p;
cudaGetDeviceProperties ( &p, 0);
printf ( "-- CUDA --\n" );
printf ( "Name: %s\n", p.name );
printf ( "Revision: %d.%d\n", p.major, p.minor );
printf ( "Global Mem: %d\n", p.totalGlobalMem );
printf ( "Shared/Blk: %d\n", p.sharedMemPerBlock );
printf ( "Regs/Blk: %d\n", p.regsPerBlock );
printf ( "Warp Size: %d\n", p.warpSize );
printf ( "Mem Pitch: %d\n", p.memPitch );
printf ( "Thrds/Blk: %d\n", p.maxThreadsPerBlock );
printf ( "Const Mem: %d\n", p.totalConstMem );
printf ( "Clock Rate: %d\n", p.clockRate );
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufPnts, 10 ) );
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufPntSort, 10 ) );
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufHash, 10 ) );
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufGrid, 10 ) );
};
// Compute number of blocks to create
int iDivUp (int a, int b) {
return (a % b != 0) ? (a / b + 1) : (a / b);
}
void computeNumBlocks (int numPnts, int maxThreads, int &numBlocks, int &numThreads)
{
numThreads = min( maxThreads, numPnts );
numBlocks = iDivUp ( numPnts, numThreads );
}
void FluidClearCUDA ()
{
BT_GPU_SAFE_CALL ( cudaFree ( bufPnts ) );
BT_GPU_SAFE_CALL ( cudaFree ( bufPntSort ) );
BT_GPU_SAFE_CALL ( cudaFree ( bufHash[0] ) );
BT_GPU_SAFE_CALL ( cudaFree ( bufHash[1] ) );
BT_GPU_SAFE_CALL ( cudaFree ( bufGrid ) );
}
void FluidSetupCUDA ( int num, int stride, float3 min, float3 max, float3 res, float3 size, int chk )
{
fcuda.min = make_float3(min.x, min.y, min.z);
fcuda.max = make_float3(max.x, max.y, max.z);
fcuda.res = make_float3(res.x, res.y, res.z);
fcuda.size = make_float3(size.x, size.y, size.z);
fcuda.pnts = num;
fcuda.delta.x = res.x / size.x;
fcuda.delta.y = res.y / size.y;
fcuda.delta.z = res.z / size.z;
fcuda.cells = res.x*res.y*res.z;
fcuda.chk = chk;
computeNumBlocks ( fcuda.pnts, 256, fcuda.numBlocks, fcuda.numThreads); // particles
computeNumBlocks ( fcuda.cells, 256, fcuda.gridBlocks, fcuda.gridThreads); // grid cell
fcuda.szPnts = (fcuda.numBlocks * fcuda.numThreads) * stride;
fcuda.szHash = (fcuda.numBlocks * fcuda.numThreads) * sizeof(uint2); // <cell, particle> pairs
fcuda.szGrid = (fcuda.gridBlocks * fcuda.gridThreads) * sizeof(uint);
fcuda.stride = stride;
printf ( "pnts: %d, t:%dx%d=%d, bufPnts:%d, bufHash:%d\n", fcuda.pnts, fcuda.numBlocks, fcuda.numThreads, fcuda.numBlocks*fcuda.numThreads, fcuda.szPnts, fcuda.szHash );
printf ( "grds: %d, t:%dx%d=%d, bufGrid:%d, Res: %dx%dx%d\n", fcuda.cells, fcuda.gridBlocks, fcuda.gridThreads, fcuda.gridBlocks*fcuda.gridThreads, fcuda.szGrid, (int) fcuda.res.x, (int) fcuda.res.y, (int) fcuda.res.z );
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufPnts, fcuda.szPnts ) );
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufPntSort, fcuda.szPnts ) );
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufHash[0], fcuda.szHash ) );
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufHash[1], fcuda.szHash ) );
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufGrid, fcuda.szGrid ) );
printf ( "POINTERS\n");
printf ( "bufPnts: %p\n", bufPnts );
printf ( "bufPntSort: %p\n", bufPntSort );
printf ( "bufHash0: %p\n", bufHash[0] );
printf ( "bufHash1: %p\n", bufHash[1] );
printf ( "bufGrid: %p\n", bufGrid );
BT_GPU_SAFE_CALL ( cudaMemcpyToSymbol ( simData, &fcuda, sizeof(FluidParams) ) );
cudaThreadSynchronize ();
}
void FluidParamCUDA ( float sim_scale, float smooth_rad, float mass, float rest, float stiff, float visc )
{
fcuda.sim_scale = sim_scale;
fcuda.smooth_rad = smooth_rad;
fcuda.r2 = smooth_rad * smooth_rad;
fcuda.pmass = mass;
fcuda.rest_dens = rest;
fcuda.stiffness = stiff;
fcuda.visc = visc;
fcuda.pdist = pow ( fcuda.pmass / fcuda.rest_dens, 1/3.0f );
fcuda.poly6kern = 315.0f / (64.0f * 3.141592 * pow( smooth_rad, 9.0f) );
fcuda.spikykern = -45.0f / (3.141592 * pow( smooth_rad, 6.0f) );
fcuda.lapkern = 45.0f / (3.141592 * pow( smooth_rad, 6.0f) );
BT_GPU_SAFE_CALL( cudaMemcpyToSymbol ( simData, &fcuda, sizeof(FluidParams) ) );
cudaThreadSynchronize ();
}
void TransferToCUDA ( char* data, int* grid, int numPoints )
{
BT_GPU_SAFE_CALL( cudaMemcpy ( bufPnts, data, numPoints * fcuda.stride, cudaMemcpyHostToDevice ) );
cudaThreadSynchronize ();
}
void TransferFromCUDA ( char* data, int* grid, int numPoints )
{
BT_GPU_SAFE_CALL( cudaMemcpy ( data, bufPntSort, numPoints * fcuda.stride, cudaMemcpyDeviceToHost ) );
cudaThreadSynchronize ();
BT_GPU_SAFE_CALL( cudaMemcpy ( grid, bufGrid, fcuda.cells * sizeof(uint), cudaMemcpyDeviceToHost ) );
}
void Grid_InsertParticlesCUDA ()
{
BT_GPU_SAFE_CALL( cudaMemset ( bufHash[0], 0, fcuda.szHash ) );
hashParticles<<< fcuda.numBlocks, fcuda.numThreads>>> ( bufPnts, (uint2*) bufHash[0], fcuda.pnts );
BT_GPU_CHECK_ERROR( "Kernel execution failed");
cudaThreadSynchronize ();
//int buf[20000];
/*printf ( "HASH: %d (%d)\n", fcuda.pnts, fcuda.numBlocks*fcuda.numThreads );
BT_GPU_SAFE_CALL( cudaMemcpy ( buf, bufHash[0], fcuda.pnts * 2*sizeof(uint), cudaMemcpyDeviceToHost ) );
//for (int n=0; n < fcuda.numBlocks*fcuda.numThreads; n++) {
for (int n=0; n < 100; n++) {
printf ( "%d: <%d,%d>\n", n, buf[n*2], buf[n*2+1] );
}*/
RadixSort( (KeyValuePair *) bufHash[0], (KeyValuePair *) bufHash[1], fcuda.pnts, 32);
BT_GPU_CHECK_ERROR( "Kernel execution failed");
cudaThreadSynchronize ();
/*printf ( "HASH: %d (%d)\n", fcuda.pnts, fcuda.numBlocks*fcuda.numThreads );
BT_GPU_SAFE_CALL( cudaMemcpy ( buf, bufHash[0], fcuda.pnts * 2*sizeof(uint), cudaMemcpyDeviceToHost ) );
//for (int n=0; n < fcuda.numBlocks*fcuda.numThreads; n++) {
for (int n=0; n < 100; n++) {
printf ( "%d: <%d,%d>\n", n, buf[n*2], buf[n*2+1] );
}*/
// insertParticles<<< fcuda.gridBlocks, fcuda.gridThreads>>> ( bufPnts, (uint2*) bufHash[0], bufGrid, fcuda.pnts, fcuda.cells );
BT_GPU_SAFE_CALL( cudaMemset ( bufGrid, NULL_HASH, fcuda.cells * sizeof(uint) ) );
insertParticlesRadix<<< fcuda.numBlocks, fcuda.numThreads>>> ( bufPnts, (uint2*) bufHash[0], bufGrid, bufPntSort, fcuda.pnts, fcuda.cells );
BT_GPU_CHECK_ERROR( "Kernel execution failed");
cudaThreadSynchronize ();
/*printf ( "GRID: %d\n", fcuda.cells );
BT_GPU_SAFE_CALL( cudaMemcpy ( buf, bufGrid, fcuda.cells * sizeof(uint), cudaMemcpyDeviceToHost ) );
*for (int n=0; n < 100; n++) {
printf ( "%d: %d\n", n, buf[n]);
}*/
}
void SPH_ComputePressureCUDA ()
{
computePressure<<< fcuda.numBlocks, fcuda.numThreads>>> ( bufPntSort, bufGrid, (uint2*) bufHash[0], fcuda.pnts );
BT_GPU_CHECK_ERROR( "Kernel execution failed");
cudaThreadSynchronize ();
}
void SPH_ComputeForceCUDA ()
{
//-- standard force
//computeForce<<< fcuda.numBlocks, fcuda.numThreads>>> ( bufPntSort, bufGrid, (uint2*) bufHash[0], fcuda.pnts );
// Force using neighbor table
computeForceNbr<<< fcuda.numBlocks, fcuda.numThreads>>> ( bufPntSort, fcuda.pnts );
BT_GPU_CHECK_ERROR( "Kernel execution failed");
cudaThreadSynchronize ();
}
void SPH_AdvanceCUDA ( float dt, float ss )
{
advanceParticles<<< fcuda.numBlocks, fcuda.numThreads>>> ( bufPntSort, fcuda.pnts, dt, ss );
BT_GPU_CHECK_ERROR( "Kernel execution failed");
cudaThreadSynchronize ();
}
} // extern C
//----------- Per frame: Malloc/Free, Host<->Device
// transfer point data to device
/*char* pntData;
int size = (fcuda.numBlocks*fcuda.numThreads) * stride;
cudaMalloc( (void**) &pntData, size);
cudaMemcpy( pntData, data, numPoints*stride, cudaMemcpyHostToDevice);
insertParticles<<< fcuda.numBlocks, fcuda.numThreads >>> ( pntData, stride, numPoints );
cudaMemcpy( data, pntData, numPoints*stride, cudaMemcpyDeviceToHost);
cudaFree( pntData );*/

View File

@@ -1,63 +1,63 @@
/*
FLUIDS v.1 - SPH Fluid Simulator for CPU and GPU
Copyright (C) 2008. Rama Hoetzlein, http://www.rchoetzlein.com
ZLib license
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include <vector_types.h>
#include <driver_types.h> // for cudaStream_t
typedef unsigned int uint; // should be 4-bytes on CUDA
typedef unsigned char uchar; // should be 1-bytes on CUDA
struct FluidParams {
int numThreads, numBlocks;
int gridThreads, gridBlocks;
int szPnts, szHash, szGrid;
int stride, pnts, cells;
int chk;
float smooth_rad, r2, sim_scale, visc;
float3 min, max, res, size, delta;
float pdist, pmass, rest_dens, stiffness;
float poly6kern, spikykern, lapkern;
};
extern "C"
{
void cudaInit(int argc, char **argv);
void FluidClearCUDA ();
void FluidSetupCUDA ( int num, int stride, float3 min, float3 max, float3 res, float3 size, int chk );
void FluidParamCUDA ( float sim_scale, float smooth_rad, float mass, float rest, float stiff, float visc );
void TransferToCUDA ( char* data, int* grid, int numPoints );
void TransferFromCUDA ( char* data, int* grid, int numPoints );
void Grid_InsertParticlesCUDA ();
void SPH_ComputePressureCUDA ();
void SPH_ComputeForceCUDA ();
void SPH_AdvanceCUDA ( float dt, float ss );
}
/*
FLUIDS v.1 - SPH Fluid Simulator for CPU and GPU
Copyright (C) 2008. Rama Hoetzlein, http://www.rchoetzlein.com
ZLib license
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include <vector_types.h>
#include <driver_types.h> // for cudaStream_t
typedef unsigned int uint; // should be 4-bytes on CUDA
typedef unsigned char uchar; // should be 1-bytes on CUDA
struct FluidParams {
int numThreads, numBlocks;
int gridThreads, gridBlocks;
int szPnts, szHash, szGrid;
int stride, pnts, cells;
int chk;
float smooth_rad, r2, sim_scale, visc;
float3 min, max, res, size, delta;
float pdist, pmass, rest_dens, stiffness;
float poly6kern, spikykern, lapkern;
};
extern "C"
{
void cudaInit(int argc, char **argv);
void FluidClearCUDA ();
void FluidSetupCUDA ( int num, int stride, float3 min, float3 max, float3 res, float3 size, int chk );
void FluidParamCUDA ( float sim_scale, float smooth_rad, float mass, float rest, float stiff, float visc );
void TransferToCUDA ( char* data, int* grid, int numPoints );
void TransferFromCUDA ( char* data, int* grid, int numPoints );
void Grid_InsertParticlesCUDA ();
void SPH_ComputePressureCUDA ();
void SPH_ComputeForceCUDA ();
void SPH_AdvanceCUDA ( float dt, float ss );
}

View File

@@ -1,402 +1,402 @@
/*
FLUIDS v.1 - SPH Fluid Simulator for CPU and GPU
Copyright (C) 2008. Rama Hoetzlein, http://www.rchoetzlein.com
ZLib license
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef _PARTICLES_KERNEL_H_
#define _PARTICLES_KERNEL_H_
#include <stdio.h>
#include <math.h>
#include "fluid_system_host.cuh"
#define TOTAL_THREADS 65536
#define BLOCK_THREADS 256
#define MAX_NBR 80
__constant__ FluidParams simData; // simulation data (on device)
__device__ int bufNeighbor[ TOTAL_THREADS*MAX_NBR ];
__device__ float bufNdist[ TOTAL_THREADS*MAX_NBR ];
#define COLOR(r,g,b) ( (uint((r)*255.0f)<<24) | (uint((g)*255.0f)<<16) | (uint((b)*255.0f)<<8) )
#define COLORA(r,g,b,a) ( (uint((r)*255.0f)<<24) | (uint((g)*255.0f)<<16) | (uint((b)*255.0f)<<8) | uint((a)*255.0f) )
#define NULL_HASH 333333
#define OFFSET_CLR 12
#define OFFSET_NEXT 16
#define OFFSET_VEL 20
#define OFFSET_VEVAL 32
#define OFFSET_PRESS 48
#define OFFSET_DENS 52
#define OFFSET_FORCE 56
__global__ void hashParticles ( char* bufPnts, uint2* bufHash, int numPnt )
{
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
float3* pos = (float3*) (bufPnts + __mul24(ndx, simData.stride) );
int gz = (pos->z - simData.min.z) * simData.delta.z ;
int gy = (pos->y - simData.min.y) * simData.delta.y ;
int gx = (pos->x - simData.min.x) * simData.delta.x ;
if ( ndx >= numPnt || gx < 0 || gz > simData.res.x-1 || gy < 0 || gy > simData.res.y-1 || gz < 0 || gz > simData.res.z-1 )
bufHash[ndx] = make_uint2( NULL_HASH, ndx );
else
bufHash[ndx] = make_uint2( __mul24(__mul24(gz, (int) simData.res.y)+gy, (int) simData.res.x) + gx, ndx );
__syncthreads ();
}
__global__ void insertParticles ( char* bufPnts, uint2* bufHash, int* bufGrid, int numPnt, int numGrid )
{
uint grid_ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // grid cell index
bufPnts += OFFSET_NEXT;
bufGrid[grid_ndx] = -1;
for (int n=0; n < numPnt; n++) {
if ( bufHash[n].x == grid_ndx ) {
*(int*) (bufPnts + __mul24(bufHash[n].y, simData.stride)) = bufGrid[grid_ndx];
bufGrid[grid_ndx] = bufHash[n].y;
}
}
__syncthreads ();
}
__global__ void insertParticlesRadix ( char* bufPnts, uint2* bufHash, int* bufGrid, char* bufPntSort, int numPnt, int numGrid )
{
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
uint2 bufHashSort = bufHash[ndx];
__shared__ uint sharedHash[257];
sharedHash[threadIdx.x+1] = bufHashSort.x;
if ( ndx > 0 && threadIdx.x == 0 ) {
volatile uint2 prevData = bufHash[ndx-1];
sharedHash[0] = prevData.x;
}
__syncthreads ();
if ( (ndx == 0 || bufHashSort.x != sharedHash[threadIdx.x]) && bufHashSort.x != NULL_HASH ) {
bufGrid [ bufHashSort.x ] = ndx;
}
if ( ndx < numPnt ) {
char* src = bufPnts + __mul24( bufHashSort.y, simData.stride );
char* dest = bufPntSort + __mul24( ndx, simData.stride );
*(float3*)(dest) = *(float3*)(src);
*(uint*) (dest + OFFSET_CLR) = *(uint*) (src + OFFSET_CLR);
*(float3*)(dest + OFFSET_VEL) = *(float3*)(src + OFFSET_VEL);
*(float3*)(dest + OFFSET_VEVAL) = *(float3*)(src + OFFSET_VEVAL);
*(float*) (dest + OFFSET_DENS) = 0.0;
*(float*) (dest + OFFSET_PRESS) = 0.0;
*(float3*) (dest + OFFSET_FORCE)= make_float3(0,0,0);
*(int*) (dest + OFFSET_NEXT) = bufHashSort.x;
}
__syncthreads ();
}
//__shared__ int ncount [ BLOCK_THREADS ];
__device__ float contributePressure ( int pndx, float3* p, int qndx, int grid_ndx, char* bufPnts, uint2* bufHash )
{
float3* qpos;
float3 dist;
float dsq, c, sum;
float d = simData.sim_scale;
int nbr = __mul24(pndx, MAX_NBR);
sum = 0.0;
for ( ; qndx < simData.pnts; qndx++ ) {
if ( bufHash[qndx].x != grid_ndx || qndx == NULL_HASH) break;
if ( qndx != pndx ) {
qpos = (float3*) ( bufPnts + __mul24(qndx, simData.stride ));
dist.x = ( p->x - qpos->x )*d; // dist in cm
dist.y = ( p->y - qpos->y )*d;
dist.z = ( p->z - qpos->z )*d;
dsq = (dist.x*dist.x + dist.y*dist.y + dist.z*dist.z);
if ( dsq < simData.r2 ) {
c = simData.r2 - dsq;
sum += c * c * c;
if ( bufNeighbor[nbr] < MAX_NBR ) {
bufNeighbor[ nbr+bufNeighbor[nbr] ] = qndx;
bufNdist[ nbr+bufNeighbor[nbr] ] = sqrt(dsq);
bufNeighbor[nbr]++;
}
}
}
//curr = *(int*) (bufPnts + __mul24(curr, simData.stride) + OFFSET_NEXT);
}
return sum;
}
/*if ( ncount[threadIdx.x] < MAX_NBR ) {
bufNeighbor [ nbr + ncount[threadIdx.x] ] = curr;
bufNdist [ nbr + ncount[threadIdx.x] ] = sqrt(dsq);
ncount[threadIdx.x]++;
}*/
__global__ void computePressure ( char* bufPntSort, int* bufGrid, uint2* bufHash, int numPnt )
{
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
//if ( ndx < 1024 ) {
float3* pos = (float3*) (bufPntSort + __mul24(ndx, simData.stride));
// Find 2x2x2 grid cells
// - Use registers only, no arrays (local-memory too slow)
int3 cell;
int gc0, gc1, gc2, gc3, gc4, gc5, gc6, gc7;
float gs = simData.smooth_rad / simData.sim_scale;
cell.x = max(0, (int)((-gs + pos->x - simData.min.x) * simData.delta.x));
cell.y = max(0, (int)((-gs + pos->y - simData.min.y) * simData.delta.y));
cell.z = max(0, (int)((-gs + pos->z - simData.min.z) * simData.delta.z));
gc0 = __mul24(__mul24(cell.z, simData.res.y) + cell.y, simData.res.x) + cell.x;
gc1 = gc0 + 1;
gc2 = gc0 + simData.res.x;
gc3 = gc2 + 1;
if ( cell.z+1 < simData.res.z ) {
gc4 = gc0 + __mul24(simData.res.x, simData.res.y);
gc5 = gc4 + 1;
gc6 = gc4 + simData.res.x;
gc7 = gc6 + 1;
}
if ( cell.x+1 >= simData.res.x ) {
gc1 = -1; gc3 = -1;
gc5 = -1; gc7 = -1;
}
if ( cell.y+1 >= simData.res.y ) {
gc2 = -1; gc3 = -1;
gc6 = -1; gc7 = -1;
}
// Sum Pressure
float sum = 0.0;
bufNeighbor[ __mul24(ndx, MAX_NBR) ] = 1;
if (gc0 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc0], gc0, bufPntSort, bufHash );
if (gc1 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc1], gc1, bufPntSort, bufHash );
if (gc2 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc2], gc2, bufPntSort, bufHash );
if (gc3 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc3], gc3, bufPntSort, bufHash );
if (gc4 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc4], gc4, bufPntSort, bufHash );
if (gc5 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc5], gc5, bufPntSort, bufHash );
if (gc6 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc6], gc6, bufPntSort, bufHash );
if (gc7 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc7], gc7, bufPntSort, bufHash );
// Compute Density & Pressure
sum = sum * simData.pmass * simData.poly6kern;
if ( sum == 0.0 ) sum = 1.0;
*(float*) ((char*)pos + OFFSET_PRESS) = ( sum - simData.rest_dens ) * simData.stiffness;
*(float*) ((char*)pos + OFFSET_DENS) = 1.0f / sum;
//}
//__syncthreads ();
}
__device__ void contributeForce ( float3& force, int pndx, float3* p, int qndx, int grid_ndx, char* bufPnts, uint2* bufHash )
{
float press = *(float*) ((char*)p + OFFSET_PRESS);
float dens = *(float*) ((char*)p + OFFSET_DENS);
float3 veval = *(float3*) ((char*)p + OFFSET_VEVAL );
float3 qeval, dist;
float c, ndistj, dsq;
float pterm, dterm, vterm;
float3* qpos;
float d = simData.sim_scale;
vterm = simData.lapkern * simData.visc;
for ( ; qndx < simData.pnts; qndx++ ) {
if ( bufHash[qndx].x != grid_ndx || qndx == NULL_HASH) break;
if ( qndx != pndx ) {
qpos = (float3*) ( bufPnts + __mul24(qndx, simData.stride ));
dist.x = ( p->x - qpos->x )*d; // dist in cm
dist.y = ( p->y - qpos->y )*d;
dist.z = ( p->z - qpos->z )*d;
dsq = (dist.x*dist.x + dist.y*dist.y + dist.z*dist.z);
if ( dsq < simData.r2 ) {
ndistj = sqrt(dsq);
c = ( simData.smooth_rad - ndistj );
dist.x = ( p->x - qpos->x )*d; // dist in cm
dist.y = ( p->y - qpos->y )*d;
dist.z = ( p->z - qpos->z )*d;
pterm = -0.5f * c * simData.spikykern * ( press + *(float*)((char*)qpos+OFFSET_PRESS) ) / ndistj;
dterm = c * dens * *(float*)((char*)qpos+OFFSET_DENS);
qeval = *(float3*)((char*)qpos+OFFSET_VEVAL);
force.x += ( pterm * dist.x + vterm * ( qeval.x - veval.x )) * dterm;
force.y += ( pterm * dist.y + vterm * ( qeval.y - veval.y )) * dterm;
force.z += ( pterm * dist.z + vterm * ( qeval.z - veval.z )) * dterm;
}
}
}
}
__global__ void computeForce ( char* bufPntSort, int* bufGrid, uint2* bufHash, int numPnt )
{
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
//if ( ndx < numPnt ) {
float3* pos = (float3*) (bufPntSort + __mul24(ndx, simData.stride));
// Find 2x2x2 grid cells
// - Use registers only, no arrays (local-memory too slow)
int3 cell;
int gc0, gc1, gc2, gc3, gc4, gc5, gc6, gc7;
float gs = simData.smooth_rad / simData.sim_scale;
cell.x = max(0, (int)((-gs + pos->x - simData.min.x) * simData.delta.x));
cell.y = max(0, (int)((-gs + pos->y - simData.min.y) * simData.delta.y));
cell.z = max(0, (int)((-gs + pos->z - simData.min.z) * simData.delta.z));
gc0 = __mul24(__mul24(cell.z, simData.res.y) + cell.y, simData.res.x) + cell.x;
gc1 = gc0 + 1;
gc2 = gc0 + simData.res.x;
gc3 = gc2 + 1;
if ( cell.z+1 < simData.res.z ) {
gc4 = gc0 + __mul24(simData.res.x, simData.res.y);
gc5 = gc4 + 1;
gc6 = gc4 + simData.res.x;
gc7 = gc6 + 1;
}
if ( cell.x+1 >= simData.res.x ) {
gc1 = -1; gc3 = -1;
gc5 = -1; gc7 = -1;
}
if ( cell.y+1 >= simData.res.y ) {
gc2 = -1; gc3 = -1;
gc6 = -1; gc7 = -1;
}
// Sum Pressure
float3 force = make_float3(0,0,0);
if (gc0 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc0], gc0, bufPntSort, bufHash );
if (gc1 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc1], gc1, bufPntSort, bufHash );
if (gc2 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc2], gc2, bufPntSort, bufHash );
if (gc3 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc3], gc3, bufPntSort, bufHash );
if (gc4 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc4], gc4, bufPntSort, bufHash );
if (gc5 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc5], gc5, bufPntSort, bufHash );
if (gc6 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc6], gc6, bufPntSort, bufHash );
if (gc7 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc7], gc7, bufPntSort, bufHash );
// Update Force
*(float3*) ((char*)pos + OFFSET_FORCE ) = force;
//}
//__syncthreads ();
}
__global__ void computeForceNbr ( char* bufPntSort, int numPnt )
{
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
if ( ndx < numPnt ) {
float3* pos = (float3*) (bufPntSort + __mul24(ndx, simData.stride));
float3* qpos;
float press = *(float*) ((char*)pos + OFFSET_PRESS);
float dens = *(float*) ((char*)pos + OFFSET_DENS);
float3 veval = *(float3*) ((char*)pos + OFFSET_VEVAL );
float3 qeval, dist, force;
float d = simData.sim_scale;
float c, ndistj;
float pterm, dterm, vterm;
vterm = simData.lapkern * simData.visc;
int nbr = __mul24(ndx, MAX_NBR);
int ncnt = bufNeighbor[ nbr ];
force = make_float3(0,0,0);
for (int j=1; j < ncnt; j++) { // base 1, n[0] = count
ndistj = bufNdist[ nbr+j ];
qpos = (float3*) (bufPntSort + __mul24( bufNeighbor[ nbr+j ], simData.stride) );
c = ( simData.smooth_rad - ndistj );
dist.x = ( pos->x - qpos->x )*d; // dist in cm
dist.y = ( pos->y - qpos->y )*d;
dist.z = ( pos->z - qpos->z )*d;
pterm = -0.5f * c * simData.spikykern * ( press + *(float*)((char*)qpos+OFFSET_PRESS) ) / ndistj;
dterm = c * dens * *(float*)((char*)qpos+OFFSET_DENS);
qeval = *(float3*)((char*)qpos+OFFSET_VEVAL);
force.x += ( pterm * dist.x + vterm * ( qeval.x - veval.x )) * dterm;
force.y += ( pterm * dist.y + vterm * ( qeval.y - veval.y )) * dterm;
force.z += ( pterm * dist.z + vterm * ( qeval.z - veval.z )) * dterm;
}
*(float3*) ((char*)pos + OFFSET_FORCE ) = force;
}
}
__global__ void advanceParticles ( char* bufPntSort, int numPnt, float dt, float ss )
{
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
if ( ndx < numPnt ) {
// Get particle vars
float3* pos = (float3*) (bufPntSort + __mul24(ndx, simData.stride));
float3* vel = (float3*) ((char*)pos + OFFSET_VEL );
float3* vel_eval = (float3*) ((char*)pos + OFFSET_VEVAL );
float3 accel = *(float3*) ((char*)pos + OFFSET_FORCE );
float3 vcurr, vnext;
// Leapfrog integration
accel.x *= 0.00020543; // NOTE - To do: SPH_PMASS should be passed in
accel.y *= 0.00020543;
accel.z *= 0.00020543;
accel.z -= 9.8;
vcurr = *vel;
vnext.x = accel.x*dt + vcurr.x;
vnext.y = accel.y*dt + vcurr.y;
vnext.z = accel.z*dt + vcurr.z; // v(t+1/2) = v(t-1/2) + a(t) dt
accel.x = (vcurr.x + vnext.x) * 0.5; // v(t+1) = [v(t-1/2) + v(t+1/2)] * 0.5 used to compute forces later
accel.y = (vcurr.y + vnext.y) * 0.5; // v(t+1) = [v(t-1/2) + v(t+1/2)] * 0.5 used to compute forces later
accel.z = (vcurr.z + vnext.z) * 0.5; // v(t+1) = [v(t-1/2) + v(t+1/2)] * 0.5 used to compute forces later
*vel_eval = accel;
*vel = vnext;
dt /= simData.sim_scale;
vnext.x = pos->x + vnext.x*dt;
vnext.y = pos->y + vnext.y*dt;
vnext.z = pos->z + vnext.z*dt;
*pos = vnext; // p(t+1) = p(t) + v(t+1/2) dt
}
__syncthreads ();
}
#endif
/*
FLUIDS v.1 - SPH Fluid Simulator for CPU and GPU
Copyright (C) 2008. Rama Hoetzlein, http://www.rchoetzlein.com
ZLib license
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef _PARTICLES_KERNEL_H_
#define _PARTICLES_KERNEL_H_
#include <stdio.h>
#include <math.h>
#include "fluid_system_host.cuh"
#define TOTAL_THREADS 65536
#define BLOCK_THREADS 256
#define MAX_NBR 80
__constant__ FluidParams simData; // simulation data (on device)
__device__ int bufNeighbor[ TOTAL_THREADS*MAX_NBR ];
__device__ float bufNdist[ TOTAL_THREADS*MAX_NBR ];
#define COLOR(r,g,b) ( (uint((r)*255.0f)<<24) | (uint((g)*255.0f)<<16) | (uint((b)*255.0f)<<8) )
#define COLORA(r,g,b,a) ( (uint((r)*255.0f)<<24) | (uint((g)*255.0f)<<16) | (uint((b)*255.0f)<<8) | uint((a)*255.0f) )
#define NULL_HASH 333333
#define OFFSET_CLR 12
#define OFFSET_NEXT 16
#define OFFSET_VEL 20
#define OFFSET_VEVAL 32
#define OFFSET_PRESS 48
#define OFFSET_DENS 52
#define OFFSET_FORCE 56
__global__ void hashParticles ( char* bufPnts, uint2* bufHash, int numPnt )
{
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
float3* pos = (float3*) (bufPnts + __mul24(ndx, simData.stride) );
int gz = (pos->z - simData.min.z) * simData.delta.z ;
int gy = (pos->y - simData.min.y) * simData.delta.y ;
int gx = (pos->x - simData.min.x) * simData.delta.x ;
if ( ndx >= numPnt || gx < 0 || gz > simData.res.x-1 || gy < 0 || gy > simData.res.y-1 || gz < 0 || gz > simData.res.z-1 )
bufHash[ndx] = make_uint2( NULL_HASH, ndx );
else
bufHash[ndx] = make_uint2( __mul24(__mul24(gz, (int) simData.res.y)+gy, (int) simData.res.x) + gx, ndx );
__syncthreads ();
}
__global__ void insertParticles ( char* bufPnts, uint2* bufHash, int* bufGrid, int numPnt, int numGrid )
{
uint grid_ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // grid cell index
bufPnts += OFFSET_NEXT;
bufGrid[grid_ndx] = -1;
for (int n=0; n < numPnt; n++) {
if ( bufHash[n].x == grid_ndx ) {
*(int*) (bufPnts + __mul24(bufHash[n].y, simData.stride)) = bufGrid[grid_ndx];
bufGrid[grid_ndx] = bufHash[n].y;
}
}
__syncthreads ();
}
__global__ void insertParticlesRadix ( char* bufPnts, uint2* bufHash, int* bufGrid, char* bufPntSort, int numPnt, int numGrid )
{
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
uint2 bufHashSort = bufHash[ndx];
__shared__ uint sharedHash[257];
sharedHash[threadIdx.x+1] = bufHashSort.x;
if ( ndx > 0 && threadIdx.x == 0 ) {
volatile uint2 prevData = bufHash[ndx-1];
sharedHash[0] = prevData.x;
}
__syncthreads ();
if ( (ndx == 0 || bufHashSort.x != sharedHash[threadIdx.x]) && bufHashSort.x != NULL_HASH ) {
bufGrid [ bufHashSort.x ] = ndx;
}
if ( ndx < numPnt ) {
char* src = bufPnts + __mul24( bufHashSort.y, simData.stride );
char* dest = bufPntSort + __mul24( ndx, simData.stride );
*(float3*)(dest) = *(float3*)(src);
*(uint*) (dest + OFFSET_CLR) = *(uint*) (src + OFFSET_CLR);
*(float3*)(dest + OFFSET_VEL) = *(float3*)(src + OFFSET_VEL);
*(float3*)(dest + OFFSET_VEVAL) = *(float3*)(src + OFFSET_VEVAL);
*(float*) (dest + OFFSET_DENS) = 0.0;
*(float*) (dest + OFFSET_PRESS) = 0.0;
*(float3*) (dest + OFFSET_FORCE)= make_float3(0,0,0);
*(int*) (dest + OFFSET_NEXT) = bufHashSort.x;
}
__syncthreads ();
}
//__shared__ int ncount [ BLOCK_THREADS ];
__device__ float contributePressure ( int pndx, float3* p, int qndx, int grid_ndx, char* bufPnts, uint2* bufHash )
{
float3* qpos;
float3 dist;
float dsq, c, sum;
float d = simData.sim_scale;
int nbr = __mul24(pndx, MAX_NBR);
sum = 0.0;
for ( ; qndx < simData.pnts; qndx++ ) {
if ( bufHash[qndx].x != grid_ndx || qndx == NULL_HASH) break;
if ( qndx != pndx ) {
qpos = (float3*) ( bufPnts + __mul24(qndx, simData.stride ));
dist.x = ( p->x - qpos->x )*d; // dist in cm
dist.y = ( p->y - qpos->y )*d;
dist.z = ( p->z - qpos->z )*d;
dsq = (dist.x*dist.x + dist.y*dist.y + dist.z*dist.z);
if ( dsq < simData.r2 ) {
c = simData.r2 - dsq;
sum += c * c * c;
if ( bufNeighbor[nbr] < MAX_NBR ) {
bufNeighbor[ nbr+bufNeighbor[nbr] ] = qndx;
bufNdist[ nbr+bufNeighbor[nbr] ] = sqrt(dsq);
bufNeighbor[nbr]++;
}
}
}
//curr = *(int*) (bufPnts + __mul24(curr, simData.stride) + OFFSET_NEXT);
}
return sum;
}
/*if ( ncount[threadIdx.x] < MAX_NBR ) {
bufNeighbor [ nbr + ncount[threadIdx.x] ] = curr;
bufNdist [ nbr + ncount[threadIdx.x] ] = sqrt(dsq);
ncount[threadIdx.x]++;
}*/
__global__ void computePressure ( char* bufPntSort, int* bufGrid, uint2* bufHash, int numPnt )
{
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
//if ( ndx < 1024 ) {
float3* pos = (float3*) (bufPntSort + __mul24(ndx, simData.stride));
// Find 2x2x2 grid cells
// - Use registers only, no arrays (local-memory too slow)
int3 cell;
int gc0, gc1, gc2, gc3, gc4, gc5, gc6, gc7;
float gs = simData.smooth_rad / simData.sim_scale;
cell.x = max(0, (int)((-gs + pos->x - simData.min.x) * simData.delta.x));
cell.y = max(0, (int)((-gs + pos->y - simData.min.y) * simData.delta.y));
cell.z = max(0, (int)((-gs + pos->z - simData.min.z) * simData.delta.z));
gc0 = __mul24(__mul24(cell.z, simData.res.y) + cell.y, simData.res.x) + cell.x;
gc1 = gc0 + 1;
gc2 = gc0 + simData.res.x;
gc3 = gc2 + 1;
if ( cell.z+1 < simData.res.z ) {
gc4 = gc0 + __mul24(simData.res.x, simData.res.y);
gc5 = gc4 + 1;
gc6 = gc4 + simData.res.x;
gc7 = gc6 + 1;
}
if ( cell.x+1 >= simData.res.x ) {
gc1 = -1; gc3 = -1;
gc5 = -1; gc7 = -1;
}
if ( cell.y+1 >= simData.res.y ) {
gc2 = -1; gc3 = -1;
gc6 = -1; gc7 = -1;
}
// Sum Pressure
float sum = 0.0;
bufNeighbor[ __mul24(ndx, MAX_NBR) ] = 1;
if (gc0 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc0], gc0, bufPntSort, bufHash );
if (gc1 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc1], gc1, bufPntSort, bufHash );
if (gc2 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc2], gc2, bufPntSort, bufHash );
if (gc3 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc3], gc3, bufPntSort, bufHash );
if (gc4 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc4], gc4, bufPntSort, bufHash );
if (gc5 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc5], gc5, bufPntSort, bufHash );
if (gc6 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc6], gc6, bufPntSort, bufHash );
if (gc7 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc7], gc7, bufPntSort, bufHash );
// Compute Density & Pressure
sum = sum * simData.pmass * simData.poly6kern;
if ( sum == 0.0 ) sum = 1.0;
*(float*) ((char*)pos + OFFSET_PRESS) = ( sum - simData.rest_dens ) * simData.stiffness;
*(float*) ((char*)pos + OFFSET_DENS) = 1.0f / sum;
//}
//__syncthreads ();
}
__device__ void contributeForce ( float3& force, int pndx, float3* p, int qndx, int grid_ndx, char* bufPnts, uint2* bufHash )
{
float press = *(float*) ((char*)p + OFFSET_PRESS);
float dens = *(float*) ((char*)p + OFFSET_DENS);
float3 veval = *(float3*) ((char*)p + OFFSET_VEVAL );
float3 qeval, dist;
float c, ndistj, dsq;
float pterm, dterm, vterm;
float3* qpos;
float d = simData.sim_scale;
vterm = simData.lapkern * simData.visc;
for ( ; qndx < simData.pnts; qndx++ ) {
if ( bufHash[qndx].x != grid_ndx || qndx == NULL_HASH) break;
if ( qndx != pndx ) {
qpos = (float3*) ( bufPnts + __mul24(qndx, simData.stride ));
dist.x = ( p->x - qpos->x )*d; // dist in cm
dist.y = ( p->y - qpos->y )*d;
dist.z = ( p->z - qpos->z )*d;
dsq = (dist.x*dist.x + dist.y*dist.y + dist.z*dist.z);
if ( dsq < simData.r2 ) {
ndistj = sqrt(dsq);
c = ( simData.smooth_rad - ndistj );
dist.x = ( p->x - qpos->x )*d; // dist in cm
dist.y = ( p->y - qpos->y )*d;
dist.z = ( p->z - qpos->z )*d;
pterm = -0.5f * c * simData.spikykern * ( press + *(float*)((char*)qpos+OFFSET_PRESS) ) / ndistj;
dterm = c * dens * *(float*)((char*)qpos+OFFSET_DENS);
qeval = *(float3*)((char*)qpos+OFFSET_VEVAL);
force.x += ( pterm * dist.x + vterm * ( qeval.x - veval.x )) * dterm;
force.y += ( pterm * dist.y + vterm * ( qeval.y - veval.y )) * dterm;
force.z += ( pterm * dist.z + vterm * ( qeval.z - veval.z )) * dterm;
}
}
}
}
__global__ void computeForce ( char* bufPntSort, int* bufGrid, uint2* bufHash, int numPnt )
{
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
//if ( ndx < numPnt ) {
float3* pos = (float3*) (bufPntSort + __mul24(ndx, simData.stride));
// Find 2x2x2 grid cells
// - Use registers only, no arrays (local-memory too slow)
int3 cell;
int gc0, gc1, gc2, gc3, gc4, gc5, gc6, gc7;
float gs = simData.smooth_rad / simData.sim_scale;
cell.x = max(0, (int)((-gs + pos->x - simData.min.x) * simData.delta.x));
cell.y = max(0, (int)((-gs + pos->y - simData.min.y) * simData.delta.y));
cell.z = max(0, (int)((-gs + pos->z - simData.min.z) * simData.delta.z));
gc0 = __mul24(__mul24(cell.z, simData.res.y) + cell.y, simData.res.x) + cell.x;
gc1 = gc0 + 1;
gc2 = gc0 + simData.res.x;
gc3 = gc2 + 1;
if ( cell.z+1 < simData.res.z ) {
gc4 = gc0 + __mul24(simData.res.x, simData.res.y);
gc5 = gc4 + 1;
gc6 = gc4 + simData.res.x;
gc7 = gc6 + 1;
}
if ( cell.x+1 >= simData.res.x ) {
gc1 = -1; gc3 = -1;
gc5 = -1; gc7 = -1;
}
if ( cell.y+1 >= simData.res.y ) {
gc2 = -1; gc3 = -1;
gc6 = -1; gc7 = -1;
}
// Sum Pressure
float3 force = make_float3(0,0,0);
if (gc0 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc0], gc0, bufPntSort, bufHash );
if (gc1 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc1], gc1, bufPntSort, bufHash );
if (gc2 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc2], gc2, bufPntSort, bufHash );
if (gc3 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc3], gc3, bufPntSort, bufHash );
if (gc4 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc4], gc4, bufPntSort, bufHash );
if (gc5 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc5], gc5, bufPntSort, bufHash );
if (gc6 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc6], gc6, bufPntSort, bufHash );
if (gc7 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc7], gc7, bufPntSort, bufHash );
// Update Force
*(float3*) ((char*)pos + OFFSET_FORCE ) = force;
//}
//__syncthreads ();
}
__global__ void computeForceNbr ( char* bufPntSort, int numPnt )
{
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
if ( ndx < numPnt ) {
float3* pos = (float3*) (bufPntSort + __mul24(ndx, simData.stride));
float3* qpos;
float press = *(float*) ((char*)pos + OFFSET_PRESS);
float dens = *(float*) ((char*)pos + OFFSET_DENS);
float3 veval = *(float3*) ((char*)pos + OFFSET_VEVAL );
float3 qeval, dist, force;
float d = simData.sim_scale;
float c, ndistj;
float pterm, dterm, vterm;
vterm = simData.lapkern * simData.visc;
int nbr = __mul24(ndx, MAX_NBR);
int ncnt = bufNeighbor[ nbr ];
force = make_float3(0,0,0);
for (int j=1; j < ncnt; j++) { // base 1, n[0] = count
ndistj = bufNdist[ nbr+j ];
qpos = (float3*) (bufPntSort + __mul24( bufNeighbor[ nbr+j ], simData.stride) );
c = ( simData.smooth_rad - ndistj );
dist.x = ( pos->x - qpos->x )*d; // dist in cm
dist.y = ( pos->y - qpos->y )*d;
dist.z = ( pos->z - qpos->z )*d;
pterm = -0.5f * c * simData.spikykern * ( press + *(float*)((char*)qpos+OFFSET_PRESS) ) / ndistj;
dterm = c * dens * *(float*)((char*)qpos+OFFSET_DENS);
qeval = *(float3*)((char*)qpos+OFFSET_VEVAL);
force.x += ( pterm * dist.x + vterm * ( qeval.x - veval.x )) * dterm;
force.y += ( pterm * dist.y + vterm * ( qeval.y - veval.y )) * dterm;
force.z += ( pterm * dist.z + vterm * ( qeval.z - veval.z )) * dterm;
}
*(float3*) ((char*)pos + OFFSET_FORCE ) = force;
}
}
__global__ void advanceParticles ( char* bufPntSort, int numPnt, float dt, float ss )
{
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
if ( ndx < numPnt ) {
// Get particle vars
float3* pos = (float3*) (bufPntSort + __mul24(ndx, simData.stride));
float3* vel = (float3*) ((char*)pos + OFFSET_VEL );
float3* vel_eval = (float3*) ((char*)pos + OFFSET_VEVAL );
float3 accel = *(float3*) ((char*)pos + OFFSET_FORCE );
float3 vcurr, vnext;
// Leapfrog integration
accel.x *= 0.00020543; // NOTE - To do: SPH_PMASS should be passed in
accel.y *= 0.00020543;
accel.z *= 0.00020543;
accel.z -= 9.8;
vcurr = *vel;
vnext.x = accel.x*dt + vcurr.x;
vnext.y = accel.y*dt + vcurr.y;
vnext.z = accel.z*dt + vcurr.z; // v(t+1/2) = v(t-1/2) + a(t) dt
accel.x = (vcurr.x + vnext.x) * 0.5; // v(t+1) = [v(t-1/2) + v(t+1/2)] * 0.5 used to compute forces later
accel.y = (vcurr.y + vnext.y) * 0.5; // v(t+1) = [v(t-1/2) + v(t+1/2)] * 0.5 used to compute forces later
accel.z = (vcurr.z + vnext.z) * 0.5; // v(t+1) = [v(t-1/2) + v(t+1/2)] * 0.5 used to compute forces later
*vel_eval = accel;
*vel = vnext;
dt /= simData.sim_scale;
vnext.x = pos->x + vnext.x*dt;
vnext.y = pos->y + vnext.y*dt;
vnext.z = pos->z + vnext.z*dt;
*pos = vnext; // p(t+1) = p(t) + v(t+1/2) dt
}
__syncthreads ();
}
#endif

View File

@@ -1,45 +1,45 @@
/*
FLUIDS v.1 - SPH Fluid Simulator for CPU and GPU
Copyright (C) 2009. Rama Hoetzlein, http://www.rchoetzlein.com
ZLib license
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef _PARTICLES_KERNEL_H_
#define _PARTICLES_KERNEL_H_
#include <stdio.h>
#include <math.h>
#include "cutil_math.h"
#include "math_constants.h"
// Insert particles in grid
__global__ void insertParticles ( char* pntData, uint pntStride )
{
int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
float4 p = *(float4*) (pntData + index*pntStride);
// get address in grid
int3 gridPos = calcGridPos(p);
addParticleToCell(gridPos, index, gridCounters, gridCells);
}
#endif
/*
FLUIDS v.1 - SPH Fluid Simulator for CPU and GPU
Copyright (C) 2009. Rama Hoetzlein, http://www.rchoetzlein.com
ZLib license
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef _PARTICLES_KERNEL_H_
#define _PARTICLES_KERNEL_H_
#include <stdio.h>
#include <math.h>
#include "cutil_math.h"
#include "math_constants.h"
// Insert particles in grid
__global__ void insertParticles ( char* pntData, uint pntStride )
{
int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
float4 p = *(float4*) (pntData + index*pntStride);
// get address in grid
int3 gridPos = calcGridPos(p);
addParticleToCell(gridPos, index, gridCounters, gridCells);
}
#endif

View File

@@ -1,79 +1,79 @@
/*
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO USER:
*
* This source code is subject to NVIDIA ownership rights under U.S. and
* international Copyright laws.
*
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
* OR PERFORMANCE OF THIS SOURCE CODE.
*
* U.S. Government End Users. This source code is a "commercial item" as
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
* "commercial computer software" and "commercial computer software
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
* and is provided to the U.S. Government only as a commercial end item.
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
* source code with only those rights set forth herein.
*/
/* Radixsort project with key/value and arbitrary datset size support
* which demonstrates the use of CUDA in a multi phase sorting
* computation.
* Host code.
*/
#include "radixsort.cuh"
#include "radixsort_kernel.cu"
extern "C"
{
////////////////////////////////////////////////////////////////////////////////
//! Perform a radix sort
//! Sorting performed in place on passed arrays.
//!
//! @param pData0 input and output array - data will be sorted
//! @param pData1 additional array to allow ping pong computation
//! @param elements number of elements to sort
////////////////////////////////////////////////////////////////////////////////
void RadixSort(KeyValuePair *pData0, KeyValuePair *pData1, uint elements, uint bits)
{
// Round element count to total number of threads for efficiency
uint elements_rounded_to_3072;
int modval = elements % 3072;
if( modval == 0 )
elements_rounded_to_3072 = elements;
else
elements_rounded_to_3072 = elements + (3072 - (modval));
// Iterate over n bytes of y bit word, using each byte to sort the list in turn
for (uint shift = 0; shift < bits; shift += RADIX)
{
// Perform one round of radix sorting
// Generate per radix group sums radix counts across a radix group
RadixSum<<<NUM_BLOCKS, NUM_THREADS_PER_BLOCK, GRFSIZE>>>(pData0, elements, elements_rounded_to_3072, shift);
// Prefix sum in radix groups, and then between groups throughout a block
RadixPrefixSum<<<PREFIX_NUM_BLOCKS, PREFIX_NUM_THREADS_PER_BLOCK, PREFIX_GRFSIZE>>>();
// Sum the block offsets and then shuffle data into bins
RadixAddOffsetsAndShuffle<<<NUM_BLOCKS, NUM_THREADS_PER_BLOCK, SHUFFLE_GRFSIZE>>>(pData0, pData1, elements, elements_rounded_to_3072, shift);
// Exchange data pointers
KeyValuePair* pTemp = pData0;
pData0 = pData1;
pData1 = pTemp;
}
}
}
/*
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO USER:
*
* This source code is subject to NVIDIA ownership rights under U.S. and
* international Copyright laws.
*
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
* OR PERFORMANCE OF THIS SOURCE CODE.
*
* U.S. Government End Users. This source code is a "commercial item" as
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
* "commercial computer software" and "commercial computer software
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
* and is provided to the U.S. Government only as a commercial end item.
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
* source code with only those rights set forth herein.
*/
/* Radixsort project with key/value and arbitrary datset size support
* which demonstrates the use of CUDA in a multi phase sorting
* computation.
* Host code.
*/
#include "radixsort.cuh"
#include "radixsort_kernel.cu"
extern "C"
{
////////////////////////////////////////////////////////////////////////////////
//! Perform a radix sort
//! Sorting performed in place on passed arrays.
//!
//! @param pData0 input and output array - data will be sorted
//! @param pData1 additional array to allow ping pong computation
//! @param elements number of elements to sort
////////////////////////////////////////////////////////////////////////////////
void RadixSort(KeyValuePair *pData0, KeyValuePair *pData1, uint elements, uint bits)
{
// Round element count to total number of threads for efficiency
uint elements_rounded_to_3072;
int modval = elements % 3072;
if( modval == 0 )
elements_rounded_to_3072 = elements;
else
elements_rounded_to_3072 = elements + (3072 - (modval));
// Iterate over n bytes of y bit word, using each byte to sort the list in turn
for (uint shift = 0; shift < bits; shift += RADIX)
{
// Perform one round of radix sorting
// Generate per radix group sums radix counts across a radix group
RadixSum<<<NUM_BLOCKS, NUM_THREADS_PER_BLOCK, GRFSIZE>>>(pData0, elements, elements_rounded_to_3072, shift);
// Prefix sum in radix groups, and then between groups throughout a block
RadixPrefixSum<<<PREFIX_NUM_BLOCKS, PREFIX_NUM_THREADS_PER_BLOCK, PREFIX_GRFSIZE>>>();
// Sum the block offsets and then shuffle data into bins
RadixAddOffsetsAndShuffle<<<NUM_BLOCKS, NUM_THREADS_PER_BLOCK, SHUFFLE_GRFSIZE>>>(pData0, pData1, elements, elements_rounded_to_3072, shift);
// Exchange data pointers
KeyValuePair* pTemp = pData0;
pData0 = pData1;
pData1 = pTemp;
}
}
}

View File

@@ -1,63 +1,63 @@
/*
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO USER:
*
* This source code is subject to NVIDIA ownership rights under U.S. and
* international Copyright laws.
*
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
* OR PERFORMANCE OF THIS SOURCE CODE.
*
* U.S. Government End Users. This source code is a "commercial item" as
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
* "commercial computer software" and "commercial computer software
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
* and is provided to the U.S. Government only as a commercial end item.
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
* source code with only those rights set forth herein.
*/
/* Radixsort project which demonstrates the use of CUDA in a multi phase
* sorting computation.
* Type definitions.
*/
#ifndef _RADIXSORT_H_
#define _RADIXSORT_H_
#include <host_defines.h>
#define SYNCIT __syncthreads()
// Use 16 bit keys/values
#define SIXTEEN 0
typedef unsigned int uint;
typedef unsigned short ushort;
#if SIXTEEN
typedef struct __align__(4) {
ushort key;
ushort value;
#else
typedef struct __align__(8) {
uint key;
uint value;
#endif
} KeyValuePair;
extern "C" {
void RadixSort(KeyValuePair *pData0, KeyValuePair *pData1, uint elements, uint bits);
}
#endif // #ifndef _RADIXSORT_H_
/*
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO USER:
*
* This source code is subject to NVIDIA ownership rights under U.S. and
* international Copyright laws.
*
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
* OR PERFORMANCE OF THIS SOURCE CODE.
*
* U.S. Government End Users. This source code is a "commercial item" as
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
* "commercial computer software" and "commercial computer software
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
* and is provided to the U.S. Government only as a commercial end item.
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
* source code with only those rights set forth herein.
*/
/* Radixsort project which demonstrates the use of CUDA in a multi phase
* sorting computation.
* Type definitions.
*/
#ifndef _RADIXSORT_H_
#define _RADIXSORT_H_
#include <host_defines.h>
#define SYNCIT __syncthreads()
// Use 16 bit keys/values
#define SIXTEEN 0
typedef unsigned int uint;
typedef unsigned short ushort;
#if SIXTEEN
typedef struct __align__(4) {
ushort key;
ushort value;
#else
typedef struct __align__(8) {
uint key;
uint value;
#endif
} KeyValuePair;
extern "C" {
void RadixSort(KeyValuePair *pData0, KeyValuePair *pData1, uint elements, uint bits);
}
#endif // #ifndef _RADIXSORT_H_

File diff suppressed because it is too large Load Diff