fix: some file didn't have the svn:eol-style native yet
This commit is contained in:
@@ -1,44 +1,44 @@
|
||||
/*
|
||||
FLUIDS v.1 - SPH Fluid Simulator for CPU and GPU
|
||||
Copyright (C) 2008. Rama Hoetzlein, http://www.rchoetzlein.com
|
||||
|
||||
ZLib license
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
#ifndef DEF_FLUID
|
||||
#define DEF_FLUID
|
||||
|
||||
#include "vector.h"
|
||||
|
||||
#include "common_defs.h"
|
||||
|
||||
struct Fluid {
|
||||
public:
|
||||
Vector3DF pos; // Basic particle (must match Particle class)
|
||||
DWORD clr;
|
||||
int next;
|
||||
Vector3DF vel;
|
||||
Vector3DF vel_eval;
|
||||
unsigned short age;
|
||||
|
||||
float pressure; // Smoothed Particle Hydrodynamics
|
||||
float density;
|
||||
Vector3DF sph_force;
|
||||
};
|
||||
|
||||
#endif /*PARTICLE_H_*/
|
||||
/*
|
||||
FLUIDS v.1 - SPH Fluid Simulator for CPU and GPU
|
||||
Copyright (C) 2008. Rama Hoetzlein, http://www.rchoetzlein.com
|
||||
|
||||
ZLib license
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
#ifndef DEF_FLUID
|
||||
#define DEF_FLUID
|
||||
|
||||
#include "vector.h"
|
||||
|
||||
#include "common_defs.h"
|
||||
|
||||
struct Fluid {
|
||||
public:
|
||||
Vector3DF pos; // Basic particle (must match Particle class)
|
||||
DWORD clr;
|
||||
int next;
|
||||
Vector3DF vel;
|
||||
Vector3DF vel_eval;
|
||||
unsigned short age;
|
||||
|
||||
float pressure; // Smoothed Particle Hydrodynamics
|
||||
float density;
|
||||
Vector3DF sph_force;
|
||||
};
|
||||
|
||||
#endif /*PARTICLE_H_*/
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,71 +1,71 @@
|
||||
/*
|
||||
FLUIDS v.1 - SPH Fluid Simulator for CPU and GPU
|
||||
Copyright (C) 2009. Rama Hoetzlein, http://www.rchoetzlein.com
|
||||
|
||||
ZLib license
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
#include <cutil.h>
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <string.h>
|
||||
|
||||
#if defined(__APPLE__) || defined(MACOSX)
|
||||
#include <GLUT/glut.h>
|
||||
#else
|
||||
#include <GL/glut.h>
|
||||
#endif
|
||||
#include <cuda_gl_interop.h>
|
||||
|
||||
#include "fluid_system_kern.cu"
|
||||
|
||||
extern "C"
|
||||
{
|
||||
|
||||
// Compute number of blocks to create
|
||||
int iDivUp (int a, int b) {
|
||||
return (a % b != 0) ? (a / b + 1) : (a / b);
|
||||
}
|
||||
void computeNumBlocks (int numPnts, int minThreads, int &numBlocks, int &numThreads)
|
||||
{
|
||||
numThreads = min( minThreads, numPnts );
|
||||
numBlocks = iDivUp ( numPnts, numThreads );
|
||||
}
|
||||
|
||||
|
||||
void Grid_InsertParticlesCUDA ( uchar* data, uint stride, uint numPoints )
|
||||
{
|
||||
int numThreads, numBlocks;
|
||||
computeNumBlocks (numPoints, 256, numBlocks, numThreads);
|
||||
|
||||
// transfer point data to device
|
||||
char* pntData;
|
||||
size = numPoints * stride;
|
||||
cudaMalloc( (void**) &pntData, size);
|
||||
cudaMemcpy( pntData, data, size, cudaMemcpyHostToDevice);
|
||||
|
||||
// execute the kernel
|
||||
insertParticles<<< numBlocks, numThreads >>> ( pntData, stride );
|
||||
|
||||
// transfer data back to host
|
||||
cudaMemcpy( data, pntData, cudaMemcpyDeviceToHost);
|
||||
|
||||
// check if kernel invocation generated an error
|
||||
CUT_CHECK_ERROR("Kernel execution failed");
|
||||
CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboPos));
|
||||
/*
|
||||
FLUIDS v.1 - SPH Fluid Simulator for CPU and GPU
|
||||
Copyright (C) 2009. Rama Hoetzlein, http://www.rchoetzlein.com
|
||||
|
||||
ZLib license
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
#include <cutil.h>
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <string.h>
|
||||
|
||||
#if defined(__APPLE__) || defined(MACOSX)
|
||||
#include <GLUT/glut.h>
|
||||
#else
|
||||
#include <GL/glut.h>
|
||||
#endif
|
||||
#include <cuda_gl_interop.h>
|
||||
|
||||
#include "fluid_system_kern.cu"
|
||||
|
||||
extern "C"
|
||||
{
|
||||
|
||||
// Compute number of blocks to create
|
||||
int iDivUp (int a, int b) {
|
||||
return (a % b != 0) ? (a / b + 1) : (a / b);
|
||||
}
|
||||
void computeNumBlocks (int numPnts, int minThreads, int &numBlocks, int &numThreads)
|
||||
{
|
||||
numThreads = min( minThreads, numPnts );
|
||||
numBlocks = iDivUp ( numPnts, numThreads );
|
||||
}
|
||||
|
||||
|
||||
void Grid_InsertParticlesCUDA ( uchar* data, uint stride, uint numPoints )
|
||||
{
|
||||
int numThreads, numBlocks;
|
||||
computeNumBlocks (numPoints, 256, numBlocks, numThreads);
|
||||
|
||||
// transfer point data to device
|
||||
char* pntData;
|
||||
size = numPoints * stride;
|
||||
cudaMalloc( (void**) &pntData, size);
|
||||
cudaMemcpy( pntData, data, size, cudaMemcpyHostToDevice);
|
||||
|
||||
// execute the kernel
|
||||
insertParticles<<< numBlocks, numThreads >>> ( pntData, stride );
|
||||
|
||||
// transfer data back to host
|
||||
cudaMemcpy( data, pntData, cudaMemcpyDeviceToHost);
|
||||
|
||||
// check if kernel invocation generated an error
|
||||
CUT_CHECK_ERROR("Kernel execution failed");
|
||||
CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboPos));
|
||||
}
|
||||
@@ -1,106 +1,106 @@
|
||||
/*
|
||||
FLUIDS v.1 - SPH Fluid Simulator for CPU and GPU
|
||||
Copyright (C) 2008. Rama Hoetzlein, http://www.rchoetzlein.com
|
||||
|
||||
ZLib license
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef DEF_FLUID_SYS
|
||||
#define DEF_FLUID_SYS
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "point_set.h"
|
||||
#include "fluid.h"
|
||||
|
||||
// Scalar params
|
||||
#define SPH_SIMSIZE 4
|
||||
#define SPH_SIMSCALE 5
|
||||
#define SPH_VISC 6
|
||||
#define SPH_RESTDENSITY 7
|
||||
#define SPH_PMASS 8
|
||||
#define SPH_PRADIUS 9
|
||||
#define SPH_PDIST 10
|
||||
#define SPH_SMOOTHRADIUS 11
|
||||
#define SPH_INTSTIFF 12
|
||||
#define SPH_EXTSTIFF 13
|
||||
#define SPH_EXTDAMP 14
|
||||
#define SPH_LIMIT 15
|
||||
#define BOUND_ZMIN_SLOPE 16
|
||||
#define FORCE_XMAX_SIN 17
|
||||
#define FORCE_XMIN_SIN 18
|
||||
#define MAX_FRAC 19
|
||||
#define CLR_MODE 20
|
||||
|
||||
// Vector params
|
||||
#define SPH_VOLMIN 7
|
||||
#define SPH_VOLMAX 8
|
||||
#define SPH_INITMIN 9
|
||||
#define SPH_INITMAX 10
|
||||
|
||||
// Toggles
|
||||
#define SPH_GRID 0
|
||||
#define SPH_DEBUG 1
|
||||
#define WRAP_X 2
|
||||
#define WALL_BARRIER 3
|
||||
#define LEVY_BARRIER 4
|
||||
#define DRAIN_BARRIER 5
|
||||
#define USE_CUDA 6
|
||||
|
||||
#define MAX_PARAM 21
|
||||
#define BFLUID 2
|
||||
|
||||
class FluidSystem : public PointSet {
|
||||
public:
|
||||
FluidSystem ();
|
||||
|
||||
// Basic Particle System
|
||||
virtual void Initialize ( int mode, int nmax );
|
||||
virtual void Reset ( int nmax );
|
||||
virtual void Run ();
|
||||
virtual void Advance ();
|
||||
virtual int AddPoint ();
|
||||
virtual int AddPointReuse ();
|
||||
Fluid* AddFluid () { return (Fluid*) GetElem(0, AddPointReuse()); }
|
||||
Fluid* GetFluid (int n) { return (Fluid*) GetElem(0, n); }
|
||||
|
||||
// Smoothed Particle Hydrodynamics
|
||||
void SPH_Setup ();
|
||||
void SPH_CreateExample ( int n, int nmax );
|
||||
void SPH_DrawDomain ();
|
||||
void SPH_ComputeKernels ();
|
||||
|
||||
void SPH_ComputePressureSlow (); // O(n^2)
|
||||
void SPH_ComputePressureGrid (); // O(kn) - spatial grid
|
||||
|
||||
void SPH_ComputeForceSlow (); // O(n^2)
|
||||
void SPH_ComputeForceGrid (); // O(kn) - spatial grid
|
||||
void SPH_ComputeForceGridNC (); // O(cn) - neighbor table
|
||||
|
||||
private:
|
||||
|
||||
// Smoothed Particle Hydrodynamics
|
||||
double m_R2, m_Poly6Kern, m_LapKern, m_SpikyKern; // Kernel functions
|
||||
};
|
||||
|
||||
#endif
|
||||
/*
|
||||
FLUIDS v.1 - SPH Fluid Simulator for CPU and GPU
|
||||
Copyright (C) 2008. Rama Hoetzlein, http://www.rchoetzlein.com
|
||||
|
||||
ZLib license
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef DEF_FLUID_SYS
|
||||
#define DEF_FLUID_SYS
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "point_set.h"
|
||||
#include "fluid.h"
|
||||
|
||||
// Scalar params
|
||||
#define SPH_SIMSIZE 4
|
||||
#define SPH_SIMSCALE 5
|
||||
#define SPH_VISC 6
|
||||
#define SPH_RESTDENSITY 7
|
||||
#define SPH_PMASS 8
|
||||
#define SPH_PRADIUS 9
|
||||
#define SPH_PDIST 10
|
||||
#define SPH_SMOOTHRADIUS 11
|
||||
#define SPH_INTSTIFF 12
|
||||
#define SPH_EXTSTIFF 13
|
||||
#define SPH_EXTDAMP 14
|
||||
#define SPH_LIMIT 15
|
||||
#define BOUND_ZMIN_SLOPE 16
|
||||
#define FORCE_XMAX_SIN 17
|
||||
#define FORCE_XMIN_SIN 18
|
||||
#define MAX_FRAC 19
|
||||
#define CLR_MODE 20
|
||||
|
||||
// Vector params
|
||||
#define SPH_VOLMIN 7
|
||||
#define SPH_VOLMAX 8
|
||||
#define SPH_INITMIN 9
|
||||
#define SPH_INITMAX 10
|
||||
|
||||
// Toggles
|
||||
#define SPH_GRID 0
|
||||
#define SPH_DEBUG 1
|
||||
#define WRAP_X 2
|
||||
#define WALL_BARRIER 3
|
||||
#define LEVY_BARRIER 4
|
||||
#define DRAIN_BARRIER 5
|
||||
#define USE_CUDA 6
|
||||
|
||||
#define MAX_PARAM 21
|
||||
#define BFLUID 2
|
||||
|
||||
class FluidSystem : public PointSet {
|
||||
public:
|
||||
FluidSystem ();
|
||||
|
||||
// Basic Particle System
|
||||
virtual void Initialize ( int mode, int nmax );
|
||||
virtual void Reset ( int nmax );
|
||||
virtual void Run ();
|
||||
virtual void Advance ();
|
||||
virtual int AddPoint ();
|
||||
virtual int AddPointReuse ();
|
||||
Fluid* AddFluid () { return (Fluid*) GetElem(0, AddPointReuse()); }
|
||||
Fluid* GetFluid (int n) { return (Fluid*) GetElem(0, n); }
|
||||
|
||||
// Smoothed Particle Hydrodynamics
|
||||
void SPH_Setup ();
|
||||
void SPH_CreateExample ( int n, int nmax );
|
||||
void SPH_DrawDomain ();
|
||||
void SPH_ComputeKernels ();
|
||||
|
||||
void SPH_ComputePressureSlow (); // O(n^2)
|
||||
void SPH_ComputePressureGrid (); // O(kn) - spatial grid
|
||||
|
||||
void SPH_ComputeForceSlow (); // O(n^2)
|
||||
void SPH_ComputeForceGrid (); // O(kn) - spatial grid
|
||||
void SPH_ComputeForceGridNC (); // O(cn) - neighbor table
|
||||
|
||||
private:
|
||||
|
||||
// Smoothed Particle Hydrodynamics
|
||||
double m_R2, m_Poly6Kern, m_LapKern, m_SpikyKern; // Kernel functions
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,250 +1,250 @@
|
||||
/*
|
||||
FLUIDS v.1 - SPH Fluid Simulator for CPU and GPU
|
||||
Copyright (C) 2008. Rama Hoetzlein, http://www.rchoetzlein.com
|
||||
|
||||
ZLib license
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
|
||||
//#include "C:\CUDA\common\inc\cutil.h" // cutil32.lib
|
||||
#include <string.h>
|
||||
#include "../CUDA/btCudaDefines.h"
|
||||
|
||||
|
||||
|
||||
#if defined(__APPLE__) || defined(MACOSX)
|
||||
#include <GLUT/glut.h>
|
||||
#else
|
||||
#include <GL/glut.h>
|
||||
#endif
|
||||
#include <cuda_gl_interop.h>
|
||||
|
||||
#include "radixsort.cu"
|
||||
#include "fluid_system_kern.cu" // build kernel
|
||||
|
||||
FluidParams fcuda;
|
||||
|
||||
__device__ char* bufPnts; // point data (array of Fluid structs)
|
||||
__device__ char* bufPntSort; // point data (array of Fluid structs)
|
||||
__device__ uint* bufHash[2]; // point grid hash
|
||||
__device__ int* bufGrid;
|
||||
|
||||
|
||||
|
||||
extern "C"
|
||||
{
|
||||
// Initialize CUDA
|
||||
void cudaInit(int argc, char **argv)
|
||||
{
|
||||
//CUT_DEVICE_INIT(argc, argv);
|
||||
|
||||
cudaDeviceProp p;
|
||||
cudaGetDeviceProperties ( &p, 0);
|
||||
|
||||
printf ( "-- CUDA --\n" );
|
||||
printf ( "Name: %s\n", p.name );
|
||||
printf ( "Revision: %d.%d\n", p.major, p.minor );
|
||||
printf ( "Global Mem: %d\n", p.totalGlobalMem );
|
||||
printf ( "Shared/Blk: %d\n", p.sharedMemPerBlock );
|
||||
printf ( "Regs/Blk: %d\n", p.regsPerBlock );
|
||||
printf ( "Warp Size: %d\n", p.warpSize );
|
||||
printf ( "Mem Pitch: %d\n", p.memPitch );
|
||||
printf ( "Thrds/Blk: %d\n", p.maxThreadsPerBlock );
|
||||
printf ( "Const Mem: %d\n", p.totalConstMem );
|
||||
printf ( "Clock Rate: %d\n", p.clockRate );
|
||||
|
||||
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufPnts, 10 ) );
|
||||
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufPntSort, 10 ) );
|
||||
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufHash, 10 ) );
|
||||
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufGrid, 10 ) );
|
||||
};
|
||||
|
||||
// Compute number of blocks to create
|
||||
int iDivUp (int a, int b) {
|
||||
return (a % b != 0) ? (a / b + 1) : (a / b);
|
||||
}
|
||||
void computeNumBlocks (int numPnts, int maxThreads, int &numBlocks, int &numThreads)
|
||||
{
|
||||
numThreads = min( maxThreads, numPnts );
|
||||
numBlocks = iDivUp ( numPnts, numThreads );
|
||||
}
|
||||
|
||||
void FluidClearCUDA ()
|
||||
{
|
||||
BT_GPU_SAFE_CALL ( cudaFree ( bufPnts ) );
|
||||
BT_GPU_SAFE_CALL ( cudaFree ( bufPntSort ) );
|
||||
BT_GPU_SAFE_CALL ( cudaFree ( bufHash[0] ) );
|
||||
BT_GPU_SAFE_CALL ( cudaFree ( bufHash[1] ) );
|
||||
BT_GPU_SAFE_CALL ( cudaFree ( bufGrid ) );
|
||||
}
|
||||
|
||||
|
||||
void FluidSetupCUDA ( int num, int stride, float3 min, float3 max, float3 res, float3 size, int chk )
|
||||
{
|
||||
fcuda.min = make_float3(min.x, min.y, min.z);
|
||||
fcuda.max = make_float3(max.x, max.y, max.z);
|
||||
fcuda.res = make_float3(res.x, res.y, res.z);
|
||||
fcuda.size = make_float3(size.x, size.y, size.z);
|
||||
fcuda.pnts = num;
|
||||
fcuda.delta.x = res.x / size.x;
|
||||
fcuda.delta.y = res.y / size.y;
|
||||
fcuda.delta.z = res.z / size.z;
|
||||
fcuda.cells = res.x*res.y*res.z;
|
||||
fcuda.chk = chk;
|
||||
|
||||
computeNumBlocks ( fcuda.pnts, 256, fcuda.numBlocks, fcuda.numThreads); // particles
|
||||
computeNumBlocks ( fcuda.cells, 256, fcuda.gridBlocks, fcuda.gridThreads); // grid cell
|
||||
|
||||
fcuda.szPnts = (fcuda.numBlocks * fcuda.numThreads) * stride;
|
||||
fcuda.szHash = (fcuda.numBlocks * fcuda.numThreads) * sizeof(uint2); // <cell, particle> pairs
|
||||
fcuda.szGrid = (fcuda.gridBlocks * fcuda.gridThreads) * sizeof(uint);
|
||||
fcuda.stride = stride;
|
||||
printf ( "pnts: %d, t:%dx%d=%d, bufPnts:%d, bufHash:%d\n", fcuda.pnts, fcuda.numBlocks, fcuda.numThreads, fcuda.numBlocks*fcuda.numThreads, fcuda.szPnts, fcuda.szHash );
|
||||
printf ( "grds: %d, t:%dx%d=%d, bufGrid:%d, Res: %dx%dx%d\n", fcuda.cells, fcuda.gridBlocks, fcuda.gridThreads, fcuda.gridBlocks*fcuda.gridThreads, fcuda.szGrid, (int) fcuda.res.x, (int) fcuda.res.y, (int) fcuda.res.z );
|
||||
|
||||
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufPnts, fcuda.szPnts ) );
|
||||
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufPntSort, fcuda.szPnts ) );
|
||||
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufHash[0], fcuda.szHash ) );
|
||||
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufHash[1], fcuda.szHash ) );
|
||||
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufGrid, fcuda.szGrid ) );
|
||||
|
||||
printf ( "POINTERS\n");
|
||||
printf ( "bufPnts: %p\n", bufPnts );
|
||||
printf ( "bufPntSort: %p\n", bufPntSort );
|
||||
printf ( "bufHash0: %p\n", bufHash[0] );
|
||||
printf ( "bufHash1: %p\n", bufHash[1] );
|
||||
printf ( "bufGrid: %p\n", bufGrid );
|
||||
|
||||
BT_GPU_SAFE_CALL ( cudaMemcpyToSymbol ( simData, &fcuda, sizeof(FluidParams) ) );
|
||||
cudaThreadSynchronize ();
|
||||
}
|
||||
|
||||
void FluidParamCUDA ( float sim_scale, float smooth_rad, float mass, float rest, float stiff, float visc )
|
||||
{
|
||||
fcuda.sim_scale = sim_scale;
|
||||
fcuda.smooth_rad = smooth_rad;
|
||||
fcuda.r2 = smooth_rad * smooth_rad;
|
||||
fcuda.pmass = mass;
|
||||
fcuda.rest_dens = rest;
|
||||
fcuda.stiffness = stiff;
|
||||
fcuda.visc = visc;
|
||||
|
||||
fcuda.pdist = pow ( fcuda.pmass / fcuda.rest_dens, 1/3.0f );
|
||||
fcuda.poly6kern = 315.0f / (64.0f * 3.141592 * pow( smooth_rad, 9.0f) );
|
||||
fcuda.spikykern = -45.0f / (3.141592 * pow( smooth_rad, 6.0f) );
|
||||
fcuda.lapkern = 45.0f / (3.141592 * pow( smooth_rad, 6.0f) );
|
||||
|
||||
BT_GPU_SAFE_CALL( cudaMemcpyToSymbol ( simData, &fcuda, sizeof(FluidParams) ) );
|
||||
cudaThreadSynchronize ();
|
||||
}
|
||||
|
||||
void TransferToCUDA ( char* data, int* grid, int numPoints )
|
||||
{
|
||||
BT_GPU_SAFE_CALL( cudaMemcpy ( bufPnts, data, numPoints * fcuda.stride, cudaMemcpyHostToDevice ) );
|
||||
cudaThreadSynchronize ();
|
||||
}
|
||||
|
||||
void TransferFromCUDA ( char* data, int* grid, int numPoints )
|
||||
{
|
||||
BT_GPU_SAFE_CALL( cudaMemcpy ( data, bufPntSort, numPoints * fcuda.stride, cudaMemcpyDeviceToHost ) );
|
||||
cudaThreadSynchronize ();
|
||||
|
||||
BT_GPU_SAFE_CALL( cudaMemcpy ( grid, bufGrid, fcuda.cells * sizeof(uint), cudaMemcpyDeviceToHost ) );
|
||||
}
|
||||
|
||||
void Grid_InsertParticlesCUDA ()
|
||||
{
|
||||
BT_GPU_SAFE_CALL( cudaMemset ( bufHash[0], 0, fcuda.szHash ) );
|
||||
|
||||
hashParticles<<< fcuda.numBlocks, fcuda.numThreads>>> ( bufPnts, (uint2*) bufHash[0], fcuda.pnts );
|
||||
BT_GPU_CHECK_ERROR( "Kernel execution failed");
|
||||
cudaThreadSynchronize ();
|
||||
|
||||
//int buf[20000];
|
||||
/*printf ( "HASH: %d (%d)\n", fcuda.pnts, fcuda.numBlocks*fcuda.numThreads );
|
||||
BT_GPU_SAFE_CALL( cudaMemcpy ( buf, bufHash[0], fcuda.pnts * 2*sizeof(uint), cudaMemcpyDeviceToHost ) );
|
||||
//for (int n=0; n < fcuda.numBlocks*fcuda.numThreads; n++) {
|
||||
for (int n=0; n < 100; n++) {
|
||||
printf ( "%d: <%d,%d>\n", n, buf[n*2], buf[n*2+1] );
|
||||
}*/
|
||||
|
||||
RadixSort( (KeyValuePair *) bufHash[0], (KeyValuePair *) bufHash[1], fcuda.pnts, 32);
|
||||
BT_GPU_CHECK_ERROR( "Kernel execution failed");
|
||||
cudaThreadSynchronize ();
|
||||
|
||||
/*printf ( "HASH: %d (%d)\n", fcuda.pnts, fcuda.numBlocks*fcuda.numThreads );
|
||||
BT_GPU_SAFE_CALL( cudaMemcpy ( buf, bufHash[0], fcuda.pnts * 2*sizeof(uint), cudaMemcpyDeviceToHost ) );
|
||||
//for (int n=0; n < fcuda.numBlocks*fcuda.numThreads; n++) {
|
||||
for (int n=0; n < 100; n++) {
|
||||
printf ( "%d: <%d,%d>\n", n, buf[n*2], buf[n*2+1] );
|
||||
}*/
|
||||
|
||||
// insertParticles<<< fcuda.gridBlocks, fcuda.gridThreads>>> ( bufPnts, (uint2*) bufHash[0], bufGrid, fcuda.pnts, fcuda.cells );
|
||||
|
||||
BT_GPU_SAFE_CALL( cudaMemset ( bufGrid, NULL_HASH, fcuda.cells * sizeof(uint) ) );
|
||||
|
||||
insertParticlesRadix<<< fcuda.numBlocks, fcuda.numThreads>>> ( bufPnts, (uint2*) bufHash[0], bufGrid, bufPntSort, fcuda.pnts, fcuda.cells );
|
||||
BT_GPU_CHECK_ERROR( "Kernel execution failed");
|
||||
cudaThreadSynchronize ();
|
||||
|
||||
/*printf ( "GRID: %d\n", fcuda.cells );
|
||||
BT_GPU_SAFE_CALL( cudaMemcpy ( buf, bufGrid, fcuda.cells * sizeof(uint), cudaMemcpyDeviceToHost ) );
|
||||
*for (int n=0; n < 100; n++) {
|
||||
printf ( "%d: %d\n", n, buf[n]);
|
||||
}*/
|
||||
}
|
||||
|
||||
void SPH_ComputePressureCUDA ()
|
||||
{
|
||||
computePressure<<< fcuda.numBlocks, fcuda.numThreads>>> ( bufPntSort, bufGrid, (uint2*) bufHash[0], fcuda.pnts );
|
||||
BT_GPU_CHECK_ERROR( "Kernel execution failed");
|
||||
cudaThreadSynchronize ();
|
||||
}
|
||||
|
||||
void SPH_ComputeForceCUDA ()
|
||||
{
|
||||
//-- standard force
|
||||
//computeForce<<< fcuda.numBlocks, fcuda.numThreads>>> ( bufPntSort, bufGrid, (uint2*) bufHash[0], fcuda.pnts );
|
||||
|
||||
// Force using neighbor table
|
||||
computeForceNbr<<< fcuda.numBlocks, fcuda.numThreads>>> ( bufPntSort, fcuda.pnts );
|
||||
BT_GPU_CHECK_ERROR( "Kernel execution failed");
|
||||
cudaThreadSynchronize ();
|
||||
}
|
||||
|
||||
void SPH_AdvanceCUDA ( float dt, float ss )
|
||||
{
|
||||
advanceParticles<<< fcuda.numBlocks, fcuda.numThreads>>> ( bufPntSort, fcuda.pnts, dt, ss );
|
||||
BT_GPU_CHECK_ERROR( "Kernel execution failed");
|
||||
cudaThreadSynchronize ();
|
||||
}
|
||||
|
||||
} // extern C
|
||||
|
||||
|
||||
|
||||
|
||||
//----------- Per frame: Malloc/Free, Host<->Device
|
||||
// transfer point data to device
|
||||
/*char* pntData;
|
||||
int size = (fcuda.numBlocks*fcuda.numThreads) * stride;
|
||||
cudaMalloc( (void**) &pntData, size);
|
||||
cudaMemcpy( pntData, data, numPoints*stride, cudaMemcpyHostToDevice);
|
||||
insertParticles<<< fcuda.numBlocks, fcuda.numThreads >>> ( pntData, stride, numPoints );
|
||||
cudaMemcpy( data, pntData, numPoints*stride, cudaMemcpyDeviceToHost);
|
||||
cudaFree( pntData );*/
|
||||
/*
|
||||
FLUIDS v.1 - SPH Fluid Simulator for CPU and GPU
|
||||
Copyright (C) 2008. Rama Hoetzlein, http://www.rchoetzlein.com
|
||||
|
||||
ZLib license
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
|
||||
//#include "C:\CUDA\common\inc\cutil.h" // cutil32.lib
|
||||
#include <string.h>
|
||||
#include "../CUDA/btCudaDefines.h"
|
||||
|
||||
|
||||
|
||||
#if defined(__APPLE__) || defined(MACOSX)
|
||||
#include <GLUT/glut.h>
|
||||
#else
|
||||
#include <GL/glut.h>
|
||||
#endif
|
||||
#include <cuda_gl_interop.h>
|
||||
|
||||
#include "radixsort.cu"
|
||||
#include "fluid_system_kern.cu" // build kernel
|
||||
|
||||
FluidParams fcuda;
|
||||
|
||||
__device__ char* bufPnts; // point data (array of Fluid structs)
|
||||
__device__ char* bufPntSort; // point data (array of Fluid structs)
|
||||
__device__ uint* bufHash[2]; // point grid hash
|
||||
__device__ int* bufGrid;
|
||||
|
||||
|
||||
|
||||
extern "C"
|
||||
{
|
||||
// Initialize CUDA
|
||||
void cudaInit(int argc, char **argv)
|
||||
{
|
||||
//CUT_DEVICE_INIT(argc, argv);
|
||||
|
||||
cudaDeviceProp p;
|
||||
cudaGetDeviceProperties ( &p, 0);
|
||||
|
||||
printf ( "-- CUDA --\n" );
|
||||
printf ( "Name: %s\n", p.name );
|
||||
printf ( "Revision: %d.%d\n", p.major, p.minor );
|
||||
printf ( "Global Mem: %d\n", p.totalGlobalMem );
|
||||
printf ( "Shared/Blk: %d\n", p.sharedMemPerBlock );
|
||||
printf ( "Regs/Blk: %d\n", p.regsPerBlock );
|
||||
printf ( "Warp Size: %d\n", p.warpSize );
|
||||
printf ( "Mem Pitch: %d\n", p.memPitch );
|
||||
printf ( "Thrds/Blk: %d\n", p.maxThreadsPerBlock );
|
||||
printf ( "Const Mem: %d\n", p.totalConstMem );
|
||||
printf ( "Clock Rate: %d\n", p.clockRate );
|
||||
|
||||
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufPnts, 10 ) );
|
||||
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufPntSort, 10 ) );
|
||||
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufHash, 10 ) );
|
||||
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufGrid, 10 ) );
|
||||
};
|
||||
|
||||
// Compute number of blocks to create
|
||||
int iDivUp (int a, int b) {
|
||||
return (a % b != 0) ? (a / b + 1) : (a / b);
|
||||
}
|
||||
void computeNumBlocks (int numPnts, int maxThreads, int &numBlocks, int &numThreads)
|
||||
{
|
||||
numThreads = min( maxThreads, numPnts );
|
||||
numBlocks = iDivUp ( numPnts, numThreads );
|
||||
}
|
||||
|
||||
void FluidClearCUDA ()
|
||||
{
|
||||
BT_GPU_SAFE_CALL ( cudaFree ( bufPnts ) );
|
||||
BT_GPU_SAFE_CALL ( cudaFree ( bufPntSort ) );
|
||||
BT_GPU_SAFE_CALL ( cudaFree ( bufHash[0] ) );
|
||||
BT_GPU_SAFE_CALL ( cudaFree ( bufHash[1] ) );
|
||||
BT_GPU_SAFE_CALL ( cudaFree ( bufGrid ) );
|
||||
}
|
||||
|
||||
|
||||
void FluidSetupCUDA ( int num, int stride, float3 min, float3 max, float3 res, float3 size, int chk )
|
||||
{
|
||||
fcuda.min = make_float3(min.x, min.y, min.z);
|
||||
fcuda.max = make_float3(max.x, max.y, max.z);
|
||||
fcuda.res = make_float3(res.x, res.y, res.z);
|
||||
fcuda.size = make_float3(size.x, size.y, size.z);
|
||||
fcuda.pnts = num;
|
||||
fcuda.delta.x = res.x / size.x;
|
||||
fcuda.delta.y = res.y / size.y;
|
||||
fcuda.delta.z = res.z / size.z;
|
||||
fcuda.cells = res.x*res.y*res.z;
|
||||
fcuda.chk = chk;
|
||||
|
||||
computeNumBlocks ( fcuda.pnts, 256, fcuda.numBlocks, fcuda.numThreads); // particles
|
||||
computeNumBlocks ( fcuda.cells, 256, fcuda.gridBlocks, fcuda.gridThreads); // grid cell
|
||||
|
||||
fcuda.szPnts = (fcuda.numBlocks * fcuda.numThreads) * stride;
|
||||
fcuda.szHash = (fcuda.numBlocks * fcuda.numThreads) * sizeof(uint2); // <cell, particle> pairs
|
||||
fcuda.szGrid = (fcuda.gridBlocks * fcuda.gridThreads) * sizeof(uint);
|
||||
fcuda.stride = stride;
|
||||
printf ( "pnts: %d, t:%dx%d=%d, bufPnts:%d, bufHash:%d\n", fcuda.pnts, fcuda.numBlocks, fcuda.numThreads, fcuda.numBlocks*fcuda.numThreads, fcuda.szPnts, fcuda.szHash );
|
||||
printf ( "grds: %d, t:%dx%d=%d, bufGrid:%d, Res: %dx%dx%d\n", fcuda.cells, fcuda.gridBlocks, fcuda.gridThreads, fcuda.gridBlocks*fcuda.gridThreads, fcuda.szGrid, (int) fcuda.res.x, (int) fcuda.res.y, (int) fcuda.res.z );
|
||||
|
||||
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufPnts, fcuda.szPnts ) );
|
||||
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufPntSort, fcuda.szPnts ) );
|
||||
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufHash[0], fcuda.szHash ) );
|
||||
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufHash[1], fcuda.szHash ) );
|
||||
BT_GPU_SAFE_CALL ( cudaMalloc ( (void**) &bufGrid, fcuda.szGrid ) );
|
||||
|
||||
printf ( "POINTERS\n");
|
||||
printf ( "bufPnts: %p\n", bufPnts );
|
||||
printf ( "bufPntSort: %p\n", bufPntSort );
|
||||
printf ( "bufHash0: %p\n", bufHash[0] );
|
||||
printf ( "bufHash1: %p\n", bufHash[1] );
|
||||
printf ( "bufGrid: %p\n", bufGrid );
|
||||
|
||||
BT_GPU_SAFE_CALL ( cudaMemcpyToSymbol ( simData, &fcuda, sizeof(FluidParams) ) );
|
||||
cudaThreadSynchronize ();
|
||||
}
|
||||
|
||||
void FluidParamCUDA ( float sim_scale, float smooth_rad, float mass, float rest, float stiff, float visc )
|
||||
{
|
||||
fcuda.sim_scale = sim_scale;
|
||||
fcuda.smooth_rad = smooth_rad;
|
||||
fcuda.r2 = smooth_rad * smooth_rad;
|
||||
fcuda.pmass = mass;
|
||||
fcuda.rest_dens = rest;
|
||||
fcuda.stiffness = stiff;
|
||||
fcuda.visc = visc;
|
||||
|
||||
fcuda.pdist = pow ( fcuda.pmass / fcuda.rest_dens, 1/3.0f );
|
||||
fcuda.poly6kern = 315.0f / (64.0f * 3.141592 * pow( smooth_rad, 9.0f) );
|
||||
fcuda.spikykern = -45.0f / (3.141592 * pow( smooth_rad, 6.0f) );
|
||||
fcuda.lapkern = 45.0f / (3.141592 * pow( smooth_rad, 6.0f) );
|
||||
|
||||
BT_GPU_SAFE_CALL( cudaMemcpyToSymbol ( simData, &fcuda, sizeof(FluidParams) ) );
|
||||
cudaThreadSynchronize ();
|
||||
}
|
||||
|
||||
void TransferToCUDA ( char* data, int* grid, int numPoints )
|
||||
{
|
||||
BT_GPU_SAFE_CALL( cudaMemcpy ( bufPnts, data, numPoints * fcuda.stride, cudaMemcpyHostToDevice ) );
|
||||
cudaThreadSynchronize ();
|
||||
}
|
||||
|
||||
void TransferFromCUDA ( char* data, int* grid, int numPoints )
|
||||
{
|
||||
BT_GPU_SAFE_CALL( cudaMemcpy ( data, bufPntSort, numPoints * fcuda.stride, cudaMemcpyDeviceToHost ) );
|
||||
cudaThreadSynchronize ();
|
||||
|
||||
BT_GPU_SAFE_CALL( cudaMemcpy ( grid, bufGrid, fcuda.cells * sizeof(uint), cudaMemcpyDeviceToHost ) );
|
||||
}
|
||||
|
||||
void Grid_InsertParticlesCUDA ()
|
||||
{
|
||||
BT_GPU_SAFE_CALL( cudaMemset ( bufHash[0], 0, fcuda.szHash ) );
|
||||
|
||||
hashParticles<<< fcuda.numBlocks, fcuda.numThreads>>> ( bufPnts, (uint2*) bufHash[0], fcuda.pnts );
|
||||
BT_GPU_CHECK_ERROR( "Kernel execution failed");
|
||||
cudaThreadSynchronize ();
|
||||
|
||||
//int buf[20000];
|
||||
/*printf ( "HASH: %d (%d)\n", fcuda.pnts, fcuda.numBlocks*fcuda.numThreads );
|
||||
BT_GPU_SAFE_CALL( cudaMemcpy ( buf, bufHash[0], fcuda.pnts * 2*sizeof(uint), cudaMemcpyDeviceToHost ) );
|
||||
//for (int n=0; n < fcuda.numBlocks*fcuda.numThreads; n++) {
|
||||
for (int n=0; n < 100; n++) {
|
||||
printf ( "%d: <%d,%d>\n", n, buf[n*2], buf[n*2+1] );
|
||||
}*/
|
||||
|
||||
RadixSort( (KeyValuePair *) bufHash[0], (KeyValuePair *) bufHash[1], fcuda.pnts, 32);
|
||||
BT_GPU_CHECK_ERROR( "Kernel execution failed");
|
||||
cudaThreadSynchronize ();
|
||||
|
||||
/*printf ( "HASH: %d (%d)\n", fcuda.pnts, fcuda.numBlocks*fcuda.numThreads );
|
||||
BT_GPU_SAFE_CALL( cudaMemcpy ( buf, bufHash[0], fcuda.pnts * 2*sizeof(uint), cudaMemcpyDeviceToHost ) );
|
||||
//for (int n=0; n < fcuda.numBlocks*fcuda.numThreads; n++) {
|
||||
for (int n=0; n < 100; n++) {
|
||||
printf ( "%d: <%d,%d>\n", n, buf[n*2], buf[n*2+1] );
|
||||
}*/
|
||||
|
||||
// insertParticles<<< fcuda.gridBlocks, fcuda.gridThreads>>> ( bufPnts, (uint2*) bufHash[0], bufGrid, fcuda.pnts, fcuda.cells );
|
||||
|
||||
BT_GPU_SAFE_CALL( cudaMemset ( bufGrid, NULL_HASH, fcuda.cells * sizeof(uint) ) );
|
||||
|
||||
insertParticlesRadix<<< fcuda.numBlocks, fcuda.numThreads>>> ( bufPnts, (uint2*) bufHash[0], bufGrid, bufPntSort, fcuda.pnts, fcuda.cells );
|
||||
BT_GPU_CHECK_ERROR( "Kernel execution failed");
|
||||
cudaThreadSynchronize ();
|
||||
|
||||
/*printf ( "GRID: %d\n", fcuda.cells );
|
||||
BT_GPU_SAFE_CALL( cudaMemcpy ( buf, bufGrid, fcuda.cells * sizeof(uint), cudaMemcpyDeviceToHost ) );
|
||||
*for (int n=0; n < 100; n++) {
|
||||
printf ( "%d: %d\n", n, buf[n]);
|
||||
}*/
|
||||
}
|
||||
|
||||
void SPH_ComputePressureCUDA ()
|
||||
{
|
||||
computePressure<<< fcuda.numBlocks, fcuda.numThreads>>> ( bufPntSort, bufGrid, (uint2*) bufHash[0], fcuda.pnts );
|
||||
BT_GPU_CHECK_ERROR( "Kernel execution failed");
|
||||
cudaThreadSynchronize ();
|
||||
}
|
||||
|
||||
void SPH_ComputeForceCUDA ()
|
||||
{
|
||||
//-- standard force
|
||||
//computeForce<<< fcuda.numBlocks, fcuda.numThreads>>> ( bufPntSort, bufGrid, (uint2*) bufHash[0], fcuda.pnts );
|
||||
|
||||
// Force using neighbor table
|
||||
computeForceNbr<<< fcuda.numBlocks, fcuda.numThreads>>> ( bufPntSort, fcuda.pnts );
|
||||
BT_GPU_CHECK_ERROR( "Kernel execution failed");
|
||||
cudaThreadSynchronize ();
|
||||
}
|
||||
|
||||
void SPH_AdvanceCUDA ( float dt, float ss )
|
||||
{
|
||||
advanceParticles<<< fcuda.numBlocks, fcuda.numThreads>>> ( bufPntSort, fcuda.pnts, dt, ss );
|
||||
BT_GPU_CHECK_ERROR( "Kernel execution failed");
|
||||
cudaThreadSynchronize ();
|
||||
}
|
||||
|
||||
} // extern C
|
||||
|
||||
|
||||
|
||||
|
||||
//----------- Per frame: Malloc/Free, Host<->Device
|
||||
// transfer point data to device
|
||||
/*char* pntData;
|
||||
int size = (fcuda.numBlocks*fcuda.numThreads) * stride;
|
||||
cudaMalloc( (void**) &pntData, size);
|
||||
cudaMemcpy( pntData, data, numPoints*stride, cudaMemcpyHostToDevice);
|
||||
insertParticles<<< fcuda.numBlocks, fcuda.numThreads >>> ( pntData, stride, numPoints );
|
||||
cudaMemcpy( data, pntData, numPoints*stride, cudaMemcpyDeviceToHost);
|
||||
cudaFree( pntData );*/
|
||||
|
||||
@@ -1,63 +1,63 @@
|
||||
/*
|
||||
FLUIDS v.1 - SPH Fluid Simulator for CPU and GPU
|
||||
Copyright (C) 2008. Rama Hoetzlein, http://www.rchoetzlein.com
|
||||
|
||||
ZLib license
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
|
||||
#include <vector_types.h>
|
||||
#include <driver_types.h> // for cudaStream_t
|
||||
|
||||
typedef unsigned int uint; // should be 4-bytes on CUDA
|
||||
typedef unsigned char uchar; // should be 1-bytes on CUDA
|
||||
|
||||
struct FluidParams {
|
||||
int numThreads, numBlocks;
|
||||
int gridThreads, gridBlocks;
|
||||
int szPnts, szHash, szGrid;
|
||||
int stride, pnts, cells;
|
||||
int chk;
|
||||
float smooth_rad, r2, sim_scale, visc;
|
||||
float3 min, max, res, size, delta;
|
||||
|
||||
float pdist, pmass, rest_dens, stiffness;
|
||||
float poly6kern, spikykern, lapkern;
|
||||
|
||||
};
|
||||
|
||||
extern "C"
|
||||
{
|
||||
|
||||
void cudaInit(int argc, char **argv);
|
||||
|
||||
void FluidClearCUDA ();
|
||||
void FluidSetupCUDA ( int num, int stride, float3 min, float3 max, float3 res, float3 size, int chk );
|
||||
void FluidParamCUDA ( float sim_scale, float smooth_rad, float mass, float rest, float stiff, float visc );
|
||||
|
||||
void TransferToCUDA ( char* data, int* grid, int numPoints );
|
||||
void TransferFromCUDA ( char* data, int* grid, int numPoints );
|
||||
|
||||
void Grid_InsertParticlesCUDA ();
|
||||
void SPH_ComputePressureCUDA ();
|
||||
void SPH_ComputeForceCUDA ();
|
||||
void SPH_AdvanceCUDA ( float dt, float ss );
|
||||
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
FLUIDS v.1 - SPH Fluid Simulator for CPU and GPU
|
||||
Copyright (C) 2008. Rama Hoetzlein, http://www.rchoetzlein.com
|
||||
|
||||
ZLib license
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
|
||||
#include <vector_types.h>
|
||||
#include <driver_types.h> // for cudaStream_t
|
||||
|
||||
typedef unsigned int uint; // should be 4-bytes on CUDA
|
||||
typedef unsigned char uchar; // should be 1-bytes on CUDA
|
||||
|
||||
struct FluidParams {
|
||||
int numThreads, numBlocks;
|
||||
int gridThreads, gridBlocks;
|
||||
int szPnts, szHash, szGrid;
|
||||
int stride, pnts, cells;
|
||||
int chk;
|
||||
float smooth_rad, r2, sim_scale, visc;
|
||||
float3 min, max, res, size, delta;
|
||||
|
||||
float pdist, pmass, rest_dens, stiffness;
|
||||
float poly6kern, spikykern, lapkern;
|
||||
|
||||
};
|
||||
|
||||
extern "C"
|
||||
{
|
||||
|
||||
void cudaInit(int argc, char **argv);
|
||||
|
||||
void FluidClearCUDA ();
|
||||
void FluidSetupCUDA ( int num, int stride, float3 min, float3 max, float3 res, float3 size, int chk );
|
||||
void FluidParamCUDA ( float sim_scale, float smooth_rad, float mass, float rest, float stiff, float visc );
|
||||
|
||||
void TransferToCUDA ( char* data, int* grid, int numPoints );
|
||||
void TransferFromCUDA ( char* data, int* grid, int numPoints );
|
||||
|
||||
void Grid_InsertParticlesCUDA ();
|
||||
void SPH_ComputePressureCUDA ();
|
||||
void SPH_ComputeForceCUDA ();
|
||||
void SPH_AdvanceCUDA ( float dt, float ss );
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,402 +1,402 @@
|
||||
/*
|
||||
FLUIDS v.1 - SPH Fluid Simulator for CPU and GPU
|
||||
Copyright (C) 2008. Rama Hoetzlein, http://www.rchoetzlein.com
|
||||
|
||||
ZLib license
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#ifndef _PARTICLES_KERNEL_H_
|
||||
#define _PARTICLES_KERNEL_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "fluid_system_host.cuh"
|
||||
|
||||
#define TOTAL_THREADS 65536
|
||||
#define BLOCK_THREADS 256
|
||||
#define MAX_NBR 80
|
||||
|
||||
__constant__ FluidParams simData; // simulation data (on device)
|
||||
|
||||
__device__ int bufNeighbor[ TOTAL_THREADS*MAX_NBR ];
|
||||
__device__ float bufNdist[ TOTAL_THREADS*MAX_NBR ];
|
||||
|
||||
#define COLOR(r,g,b) ( (uint((r)*255.0f)<<24) | (uint((g)*255.0f)<<16) | (uint((b)*255.0f)<<8) )
|
||||
#define COLORA(r,g,b,a) ( (uint((r)*255.0f)<<24) | (uint((g)*255.0f)<<16) | (uint((b)*255.0f)<<8) | uint((a)*255.0f) )
|
||||
|
||||
#define NULL_HASH 333333
|
||||
|
||||
#define OFFSET_CLR 12
|
||||
#define OFFSET_NEXT 16
|
||||
#define OFFSET_VEL 20
|
||||
#define OFFSET_VEVAL 32
|
||||
#define OFFSET_PRESS 48
|
||||
#define OFFSET_DENS 52
|
||||
#define OFFSET_FORCE 56
|
||||
|
||||
|
||||
__global__ void hashParticles ( char* bufPnts, uint2* bufHash, int numPnt )
|
||||
{
|
||||
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
|
||||
float3* pos = (float3*) (bufPnts + __mul24(ndx, simData.stride) );
|
||||
int gz = (pos->z - simData.min.z) * simData.delta.z ;
|
||||
int gy = (pos->y - simData.min.y) * simData.delta.y ;
|
||||
int gx = (pos->x - simData.min.x) * simData.delta.x ;
|
||||
if ( ndx >= numPnt || gx < 0 || gz > simData.res.x-1 || gy < 0 || gy > simData.res.y-1 || gz < 0 || gz > simData.res.z-1 )
|
||||
bufHash[ndx] = make_uint2( NULL_HASH, ndx );
|
||||
else
|
||||
bufHash[ndx] = make_uint2( __mul24(__mul24(gz, (int) simData.res.y)+gy, (int) simData.res.x) + gx, ndx );
|
||||
|
||||
__syncthreads ();
|
||||
}
|
||||
|
||||
__global__ void insertParticles ( char* bufPnts, uint2* bufHash, int* bufGrid, int numPnt, int numGrid )
|
||||
{
|
||||
uint grid_ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // grid cell index
|
||||
|
||||
bufPnts += OFFSET_NEXT;
|
||||
bufGrid[grid_ndx] = -1;
|
||||
for (int n=0; n < numPnt; n++) {
|
||||
if ( bufHash[n].x == grid_ndx ) {
|
||||
*(int*) (bufPnts + __mul24(bufHash[n].y, simData.stride)) = bufGrid[grid_ndx];
|
||||
bufGrid[grid_ndx] = bufHash[n].y;
|
||||
}
|
||||
}
|
||||
__syncthreads ();
|
||||
}
|
||||
|
||||
__global__ void insertParticlesRadix ( char* bufPnts, uint2* bufHash, int* bufGrid, char* bufPntSort, int numPnt, int numGrid )
|
||||
{
|
||||
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
|
||||
|
||||
uint2 bufHashSort = bufHash[ndx];
|
||||
|
||||
__shared__ uint sharedHash[257];
|
||||
sharedHash[threadIdx.x+1] = bufHashSort.x;
|
||||
if ( ndx > 0 && threadIdx.x == 0 ) {
|
||||
volatile uint2 prevData = bufHash[ndx-1];
|
||||
sharedHash[0] = prevData.x;
|
||||
}
|
||||
__syncthreads ();
|
||||
|
||||
if ( (ndx == 0 || bufHashSort.x != sharedHash[threadIdx.x]) && bufHashSort.x != NULL_HASH ) {
|
||||
bufGrid [ bufHashSort.x ] = ndx;
|
||||
}
|
||||
if ( ndx < numPnt ) {
|
||||
char* src = bufPnts + __mul24( bufHashSort.y, simData.stride );
|
||||
char* dest = bufPntSort + __mul24( ndx, simData.stride );
|
||||
|
||||
*(float3*)(dest) = *(float3*)(src);
|
||||
*(uint*) (dest + OFFSET_CLR) = *(uint*) (src + OFFSET_CLR);
|
||||
*(float3*)(dest + OFFSET_VEL) = *(float3*)(src + OFFSET_VEL);
|
||||
*(float3*)(dest + OFFSET_VEVAL) = *(float3*)(src + OFFSET_VEVAL);
|
||||
|
||||
*(float*) (dest + OFFSET_DENS) = 0.0;
|
||||
*(float*) (dest + OFFSET_PRESS) = 0.0;
|
||||
*(float3*) (dest + OFFSET_FORCE)= make_float3(0,0,0);
|
||||
*(int*) (dest + OFFSET_NEXT) = bufHashSort.x;
|
||||
}
|
||||
|
||||
__syncthreads ();
|
||||
|
||||
}
|
||||
|
||||
//__shared__ int ncount [ BLOCK_THREADS ];
|
||||
|
||||
__device__ float contributePressure ( int pndx, float3* p, int qndx, int grid_ndx, char* bufPnts, uint2* bufHash )
|
||||
{
|
||||
float3* qpos;
|
||||
float3 dist;
|
||||
float dsq, c, sum;
|
||||
float d = simData.sim_scale;
|
||||
int nbr = __mul24(pndx, MAX_NBR);
|
||||
|
||||
sum = 0.0;
|
||||
for ( ; qndx < simData.pnts; qndx++ ) {
|
||||
|
||||
if ( bufHash[qndx].x != grid_ndx || qndx == NULL_HASH) break;
|
||||
|
||||
if ( qndx != pndx ) {
|
||||
qpos = (float3*) ( bufPnts + __mul24(qndx, simData.stride ));
|
||||
|
||||
dist.x = ( p->x - qpos->x )*d; // dist in cm
|
||||
dist.y = ( p->y - qpos->y )*d;
|
||||
dist.z = ( p->z - qpos->z )*d;
|
||||
dsq = (dist.x*dist.x + dist.y*dist.y + dist.z*dist.z);
|
||||
if ( dsq < simData.r2 ) {
|
||||
c = simData.r2 - dsq;
|
||||
sum += c * c * c;
|
||||
if ( bufNeighbor[nbr] < MAX_NBR ) {
|
||||
bufNeighbor[ nbr+bufNeighbor[nbr] ] = qndx;
|
||||
bufNdist[ nbr+bufNeighbor[nbr] ] = sqrt(dsq);
|
||||
bufNeighbor[nbr]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
//curr = *(int*) (bufPnts + __mul24(curr, simData.stride) + OFFSET_NEXT);
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
/*if ( ncount[threadIdx.x] < MAX_NBR ) {
|
||||
bufNeighbor [ nbr + ncount[threadIdx.x] ] = curr;
|
||||
bufNdist [ nbr + ncount[threadIdx.x] ] = sqrt(dsq);
|
||||
ncount[threadIdx.x]++;
|
||||
}*/
|
||||
|
||||
__global__ void computePressure ( char* bufPntSort, int* bufGrid, uint2* bufHash, int numPnt )
|
||||
{
|
||||
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
|
||||
|
||||
//if ( ndx < 1024 ) {
|
||||
|
||||
float3* pos = (float3*) (bufPntSort + __mul24(ndx, simData.stride));
|
||||
|
||||
// Find 2x2x2 grid cells
|
||||
// - Use registers only, no arrays (local-memory too slow)
|
||||
int3 cell;
|
||||
int gc0, gc1, gc2, gc3, gc4, gc5, gc6, gc7;
|
||||
float gs = simData.smooth_rad / simData.sim_scale;
|
||||
|
||||
cell.x = max(0, (int)((-gs + pos->x - simData.min.x) * simData.delta.x));
|
||||
cell.y = max(0, (int)((-gs + pos->y - simData.min.y) * simData.delta.y));
|
||||
cell.z = max(0, (int)((-gs + pos->z - simData.min.z) * simData.delta.z));
|
||||
gc0 = __mul24(__mul24(cell.z, simData.res.y) + cell.y, simData.res.x) + cell.x;
|
||||
gc1 = gc0 + 1;
|
||||
gc2 = gc0 + simData.res.x;
|
||||
gc3 = gc2 + 1;
|
||||
if ( cell.z+1 < simData.res.z ) {
|
||||
gc4 = gc0 + __mul24(simData.res.x, simData.res.y);
|
||||
gc5 = gc4 + 1;
|
||||
gc6 = gc4 + simData.res.x;
|
||||
gc7 = gc6 + 1;
|
||||
}
|
||||
if ( cell.x+1 >= simData.res.x ) {
|
||||
gc1 = -1; gc3 = -1;
|
||||
gc5 = -1; gc7 = -1;
|
||||
}
|
||||
if ( cell.y+1 >= simData.res.y ) {
|
||||
gc2 = -1; gc3 = -1;
|
||||
gc6 = -1; gc7 = -1;
|
||||
}
|
||||
// Sum Pressure
|
||||
float sum = 0.0;
|
||||
bufNeighbor[ __mul24(ndx, MAX_NBR) ] = 1;
|
||||
if (gc0 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc0], gc0, bufPntSort, bufHash );
|
||||
if (gc1 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc1], gc1, bufPntSort, bufHash );
|
||||
if (gc2 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc2], gc2, bufPntSort, bufHash );
|
||||
if (gc3 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc3], gc3, bufPntSort, bufHash );
|
||||
if (gc4 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc4], gc4, bufPntSort, bufHash );
|
||||
if (gc5 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc5], gc5, bufPntSort, bufHash );
|
||||
if (gc6 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc6], gc6, bufPntSort, bufHash );
|
||||
if (gc7 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc7], gc7, bufPntSort, bufHash );
|
||||
|
||||
// Compute Density & Pressure
|
||||
sum = sum * simData.pmass * simData.poly6kern;
|
||||
if ( sum == 0.0 ) sum = 1.0;
|
||||
*(float*) ((char*)pos + OFFSET_PRESS) = ( sum - simData.rest_dens ) * simData.stiffness;
|
||||
*(float*) ((char*)pos + OFFSET_DENS) = 1.0f / sum;
|
||||
|
||||
//}
|
||||
//__syncthreads ();
|
||||
}
|
||||
|
||||
__device__ void contributeForce ( float3& force, int pndx, float3* p, int qndx, int grid_ndx, char* bufPnts, uint2* bufHash )
|
||||
{
|
||||
float press = *(float*) ((char*)p + OFFSET_PRESS);
|
||||
float dens = *(float*) ((char*)p + OFFSET_DENS);
|
||||
float3 veval = *(float3*) ((char*)p + OFFSET_VEVAL );
|
||||
float3 qeval, dist;
|
||||
float c, ndistj, dsq;
|
||||
float pterm, dterm, vterm;
|
||||
float3* qpos;
|
||||
float d = simData.sim_scale;
|
||||
|
||||
vterm = simData.lapkern * simData.visc;
|
||||
|
||||
for ( ; qndx < simData.pnts; qndx++ ) {
|
||||
|
||||
if ( bufHash[qndx].x != grid_ndx || qndx == NULL_HASH) break;
|
||||
|
||||
if ( qndx != pndx ) {
|
||||
qpos = (float3*) ( bufPnts + __mul24(qndx, simData.stride ));
|
||||
|
||||
dist.x = ( p->x - qpos->x )*d; // dist in cm
|
||||
dist.y = ( p->y - qpos->y )*d;
|
||||
dist.z = ( p->z - qpos->z )*d;
|
||||
dsq = (dist.x*dist.x + dist.y*dist.y + dist.z*dist.z);
|
||||
if ( dsq < simData.r2 ) {
|
||||
ndistj = sqrt(dsq);
|
||||
c = ( simData.smooth_rad - ndistj );
|
||||
dist.x = ( p->x - qpos->x )*d; // dist in cm
|
||||
dist.y = ( p->y - qpos->y )*d;
|
||||
dist.z = ( p->z - qpos->z )*d;
|
||||
pterm = -0.5f * c * simData.spikykern * ( press + *(float*)((char*)qpos+OFFSET_PRESS) ) / ndistj;
|
||||
dterm = c * dens * *(float*)((char*)qpos+OFFSET_DENS);
|
||||
qeval = *(float3*)((char*)qpos+OFFSET_VEVAL);
|
||||
force.x += ( pterm * dist.x + vterm * ( qeval.x - veval.x )) * dterm;
|
||||
force.y += ( pterm * dist.y + vterm * ( qeval.y - veval.y )) * dterm;
|
||||
force.z += ( pterm * dist.z + vterm * ( qeval.z - veval.z )) * dterm;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
__global__ void computeForce ( char* bufPntSort, int* bufGrid, uint2* bufHash, int numPnt )
|
||||
{
|
||||
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
|
||||
|
||||
//if ( ndx < numPnt ) {
|
||||
|
||||
float3* pos = (float3*) (bufPntSort + __mul24(ndx, simData.stride));
|
||||
|
||||
// Find 2x2x2 grid cells
|
||||
// - Use registers only, no arrays (local-memory too slow)
|
||||
int3 cell;
|
||||
int gc0, gc1, gc2, gc3, gc4, gc5, gc6, gc7;
|
||||
float gs = simData.smooth_rad / simData.sim_scale;
|
||||
|
||||
cell.x = max(0, (int)((-gs + pos->x - simData.min.x) * simData.delta.x));
|
||||
cell.y = max(0, (int)((-gs + pos->y - simData.min.y) * simData.delta.y));
|
||||
cell.z = max(0, (int)((-gs + pos->z - simData.min.z) * simData.delta.z));
|
||||
gc0 = __mul24(__mul24(cell.z, simData.res.y) + cell.y, simData.res.x) + cell.x;
|
||||
gc1 = gc0 + 1;
|
||||
gc2 = gc0 + simData.res.x;
|
||||
gc3 = gc2 + 1;
|
||||
if ( cell.z+1 < simData.res.z ) {
|
||||
gc4 = gc0 + __mul24(simData.res.x, simData.res.y);
|
||||
gc5 = gc4 + 1;
|
||||
gc6 = gc4 + simData.res.x;
|
||||
gc7 = gc6 + 1;
|
||||
}
|
||||
if ( cell.x+1 >= simData.res.x ) {
|
||||
gc1 = -1; gc3 = -1;
|
||||
gc5 = -1; gc7 = -1;
|
||||
}
|
||||
if ( cell.y+1 >= simData.res.y ) {
|
||||
gc2 = -1; gc3 = -1;
|
||||
gc6 = -1; gc7 = -1;
|
||||
}
|
||||
// Sum Pressure
|
||||
float3 force = make_float3(0,0,0);
|
||||
if (gc0 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc0], gc0, bufPntSort, bufHash );
|
||||
if (gc1 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc1], gc1, bufPntSort, bufHash );
|
||||
if (gc2 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc2], gc2, bufPntSort, bufHash );
|
||||
if (gc3 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc3], gc3, bufPntSort, bufHash );
|
||||
if (gc4 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc4], gc4, bufPntSort, bufHash );
|
||||
if (gc5 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc5], gc5, bufPntSort, bufHash );
|
||||
if (gc6 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc6], gc6, bufPntSort, bufHash );
|
||||
if (gc7 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc7], gc7, bufPntSort, bufHash );
|
||||
|
||||
// Update Force
|
||||
*(float3*) ((char*)pos + OFFSET_FORCE ) = force;
|
||||
|
||||
//}
|
||||
//__syncthreads ();
|
||||
}
|
||||
|
||||
|
||||
__global__ void computeForceNbr ( char* bufPntSort, int numPnt )
|
||||
{
|
||||
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
|
||||
|
||||
if ( ndx < numPnt ) {
|
||||
|
||||
float3* pos = (float3*) (bufPntSort + __mul24(ndx, simData.stride));
|
||||
|
||||
float3* qpos;
|
||||
float press = *(float*) ((char*)pos + OFFSET_PRESS);
|
||||
float dens = *(float*) ((char*)pos + OFFSET_DENS);
|
||||
float3 veval = *(float3*) ((char*)pos + OFFSET_VEVAL );
|
||||
float3 qeval, dist, force;
|
||||
float d = simData.sim_scale;
|
||||
float c, ndistj;
|
||||
float pterm, dterm, vterm;
|
||||
vterm = simData.lapkern * simData.visc;
|
||||
int nbr = __mul24(ndx, MAX_NBR);
|
||||
|
||||
int ncnt = bufNeighbor[ nbr ];
|
||||
|
||||
force = make_float3(0,0,0);
|
||||
for (int j=1; j < ncnt; j++) { // base 1, n[0] = count
|
||||
ndistj = bufNdist[ nbr+j ];
|
||||
qpos = (float3*) (bufPntSort + __mul24( bufNeighbor[ nbr+j ], simData.stride) );
|
||||
c = ( simData.smooth_rad - ndistj );
|
||||
dist.x = ( pos->x - qpos->x )*d; // dist in cm
|
||||
dist.y = ( pos->y - qpos->y )*d;
|
||||
dist.z = ( pos->z - qpos->z )*d;
|
||||
pterm = -0.5f * c * simData.spikykern * ( press + *(float*)((char*)qpos+OFFSET_PRESS) ) / ndistj;
|
||||
dterm = c * dens * *(float*)((char*)qpos+OFFSET_DENS);
|
||||
qeval = *(float3*)((char*)qpos+OFFSET_VEVAL);
|
||||
force.x += ( pterm * dist.x + vterm * ( qeval.x - veval.x )) * dterm;
|
||||
force.y += ( pterm * dist.y + vterm * ( qeval.y - veval.y )) * dterm;
|
||||
force.z += ( pterm * dist.z + vterm * ( qeval.z - veval.z )) * dterm;
|
||||
}
|
||||
*(float3*) ((char*)pos + OFFSET_FORCE ) = force;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
__global__ void advanceParticles ( char* bufPntSort, int numPnt, float dt, float ss )
|
||||
{
|
||||
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
|
||||
|
||||
if ( ndx < numPnt ) {
|
||||
|
||||
// Get particle vars
|
||||
float3* pos = (float3*) (bufPntSort + __mul24(ndx, simData.stride));
|
||||
float3* vel = (float3*) ((char*)pos + OFFSET_VEL );
|
||||
float3* vel_eval = (float3*) ((char*)pos + OFFSET_VEVAL );
|
||||
float3 accel = *(float3*) ((char*)pos + OFFSET_FORCE );
|
||||
float3 vcurr, vnext;
|
||||
|
||||
// Leapfrog integration
|
||||
accel.x *= 0.00020543; // NOTE - To do: SPH_PMASS should be passed in
|
||||
accel.y *= 0.00020543;
|
||||
accel.z *= 0.00020543;
|
||||
accel.z -= 9.8;
|
||||
|
||||
vcurr = *vel;
|
||||
vnext.x = accel.x*dt + vcurr.x;
|
||||
vnext.y = accel.y*dt + vcurr.y;
|
||||
vnext.z = accel.z*dt + vcurr.z; // v(t+1/2) = v(t-1/2) + a(t) dt
|
||||
|
||||
accel.x = (vcurr.x + vnext.x) * 0.5; // v(t+1) = [v(t-1/2) + v(t+1/2)] * 0.5 used to compute forces later
|
||||
accel.y = (vcurr.y + vnext.y) * 0.5; // v(t+1) = [v(t-1/2) + v(t+1/2)] * 0.5 used to compute forces later
|
||||
accel.z = (vcurr.z + vnext.z) * 0.5; // v(t+1) = [v(t-1/2) + v(t+1/2)] * 0.5 used to compute forces later
|
||||
|
||||
*vel_eval = accel;
|
||||
*vel = vnext;
|
||||
|
||||
dt /= simData.sim_scale;
|
||||
vnext.x = pos->x + vnext.x*dt;
|
||||
vnext.y = pos->y + vnext.y*dt;
|
||||
vnext.z = pos->z + vnext.z*dt;
|
||||
*pos = vnext; // p(t+1) = p(t) + v(t+1/2) dt
|
||||
}
|
||||
|
||||
__syncthreads ();
|
||||
}
|
||||
|
||||
#endif
|
||||
/*
|
||||
FLUIDS v.1 - SPH Fluid Simulator for CPU and GPU
|
||||
Copyright (C) 2008. Rama Hoetzlein, http://www.rchoetzlein.com
|
||||
|
||||
ZLib license
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#ifndef _PARTICLES_KERNEL_H_
|
||||
#define _PARTICLES_KERNEL_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "fluid_system_host.cuh"
|
||||
|
||||
#define TOTAL_THREADS 65536
|
||||
#define BLOCK_THREADS 256
|
||||
#define MAX_NBR 80
|
||||
|
||||
__constant__ FluidParams simData; // simulation data (on device)
|
||||
|
||||
__device__ int bufNeighbor[ TOTAL_THREADS*MAX_NBR ];
|
||||
__device__ float bufNdist[ TOTAL_THREADS*MAX_NBR ];
|
||||
|
||||
#define COLOR(r,g,b) ( (uint((r)*255.0f)<<24) | (uint((g)*255.0f)<<16) | (uint((b)*255.0f)<<8) )
|
||||
#define COLORA(r,g,b,a) ( (uint((r)*255.0f)<<24) | (uint((g)*255.0f)<<16) | (uint((b)*255.0f)<<8) | uint((a)*255.0f) )
|
||||
|
||||
#define NULL_HASH 333333
|
||||
|
||||
#define OFFSET_CLR 12
|
||||
#define OFFSET_NEXT 16
|
||||
#define OFFSET_VEL 20
|
||||
#define OFFSET_VEVAL 32
|
||||
#define OFFSET_PRESS 48
|
||||
#define OFFSET_DENS 52
|
||||
#define OFFSET_FORCE 56
|
||||
|
||||
|
||||
__global__ void hashParticles ( char* bufPnts, uint2* bufHash, int numPnt )
|
||||
{
|
||||
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
|
||||
float3* pos = (float3*) (bufPnts + __mul24(ndx, simData.stride) );
|
||||
int gz = (pos->z - simData.min.z) * simData.delta.z ;
|
||||
int gy = (pos->y - simData.min.y) * simData.delta.y ;
|
||||
int gx = (pos->x - simData.min.x) * simData.delta.x ;
|
||||
if ( ndx >= numPnt || gx < 0 || gz > simData.res.x-1 || gy < 0 || gy > simData.res.y-1 || gz < 0 || gz > simData.res.z-1 )
|
||||
bufHash[ndx] = make_uint2( NULL_HASH, ndx );
|
||||
else
|
||||
bufHash[ndx] = make_uint2( __mul24(__mul24(gz, (int) simData.res.y)+gy, (int) simData.res.x) + gx, ndx );
|
||||
|
||||
__syncthreads ();
|
||||
}
|
||||
|
||||
__global__ void insertParticles ( char* bufPnts, uint2* bufHash, int* bufGrid, int numPnt, int numGrid )
|
||||
{
|
||||
uint grid_ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // grid cell index
|
||||
|
||||
bufPnts += OFFSET_NEXT;
|
||||
bufGrid[grid_ndx] = -1;
|
||||
for (int n=0; n < numPnt; n++) {
|
||||
if ( bufHash[n].x == grid_ndx ) {
|
||||
*(int*) (bufPnts + __mul24(bufHash[n].y, simData.stride)) = bufGrid[grid_ndx];
|
||||
bufGrid[grid_ndx] = bufHash[n].y;
|
||||
}
|
||||
}
|
||||
__syncthreads ();
|
||||
}
|
||||
|
||||
__global__ void insertParticlesRadix ( char* bufPnts, uint2* bufHash, int* bufGrid, char* bufPntSort, int numPnt, int numGrid )
|
||||
{
|
||||
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
|
||||
|
||||
uint2 bufHashSort = bufHash[ndx];
|
||||
|
||||
__shared__ uint sharedHash[257];
|
||||
sharedHash[threadIdx.x+1] = bufHashSort.x;
|
||||
if ( ndx > 0 && threadIdx.x == 0 ) {
|
||||
volatile uint2 prevData = bufHash[ndx-1];
|
||||
sharedHash[0] = prevData.x;
|
||||
}
|
||||
__syncthreads ();
|
||||
|
||||
if ( (ndx == 0 || bufHashSort.x != sharedHash[threadIdx.x]) && bufHashSort.x != NULL_HASH ) {
|
||||
bufGrid [ bufHashSort.x ] = ndx;
|
||||
}
|
||||
if ( ndx < numPnt ) {
|
||||
char* src = bufPnts + __mul24( bufHashSort.y, simData.stride );
|
||||
char* dest = bufPntSort + __mul24( ndx, simData.stride );
|
||||
|
||||
*(float3*)(dest) = *(float3*)(src);
|
||||
*(uint*) (dest + OFFSET_CLR) = *(uint*) (src + OFFSET_CLR);
|
||||
*(float3*)(dest + OFFSET_VEL) = *(float3*)(src + OFFSET_VEL);
|
||||
*(float3*)(dest + OFFSET_VEVAL) = *(float3*)(src + OFFSET_VEVAL);
|
||||
|
||||
*(float*) (dest + OFFSET_DENS) = 0.0;
|
||||
*(float*) (dest + OFFSET_PRESS) = 0.0;
|
||||
*(float3*) (dest + OFFSET_FORCE)= make_float3(0,0,0);
|
||||
*(int*) (dest + OFFSET_NEXT) = bufHashSort.x;
|
||||
}
|
||||
|
||||
__syncthreads ();
|
||||
|
||||
}
|
||||
|
||||
//__shared__ int ncount [ BLOCK_THREADS ];
|
||||
|
||||
__device__ float contributePressure ( int pndx, float3* p, int qndx, int grid_ndx, char* bufPnts, uint2* bufHash )
|
||||
{
|
||||
float3* qpos;
|
||||
float3 dist;
|
||||
float dsq, c, sum;
|
||||
float d = simData.sim_scale;
|
||||
int nbr = __mul24(pndx, MAX_NBR);
|
||||
|
||||
sum = 0.0;
|
||||
for ( ; qndx < simData.pnts; qndx++ ) {
|
||||
|
||||
if ( bufHash[qndx].x != grid_ndx || qndx == NULL_HASH) break;
|
||||
|
||||
if ( qndx != pndx ) {
|
||||
qpos = (float3*) ( bufPnts + __mul24(qndx, simData.stride ));
|
||||
|
||||
dist.x = ( p->x - qpos->x )*d; // dist in cm
|
||||
dist.y = ( p->y - qpos->y )*d;
|
||||
dist.z = ( p->z - qpos->z )*d;
|
||||
dsq = (dist.x*dist.x + dist.y*dist.y + dist.z*dist.z);
|
||||
if ( dsq < simData.r2 ) {
|
||||
c = simData.r2 - dsq;
|
||||
sum += c * c * c;
|
||||
if ( bufNeighbor[nbr] < MAX_NBR ) {
|
||||
bufNeighbor[ nbr+bufNeighbor[nbr] ] = qndx;
|
||||
bufNdist[ nbr+bufNeighbor[nbr] ] = sqrt(dsq);
|
||||
bufNeighbor[nbr]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
//curr = *(int*) (bufPnts + __mul24(curr, simData.stride) + OFFSET_NEXT);
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
/*if ( ncount[threadIdx.x] < MAX_NBR ) {
|
||||
bufNeighbor [ nbr + ncount[threadIdx.x] ] = curr;
|
||||
bufNdist [ nbr + ncount[threadIdx.x] ] = sqrt(dsq);
|
||||
ncount[threadIdx.x]++;
|
||||
}*/
|
||||
|
||||
__global__ void computePressure ( char* bufPntSort, int* bufGrid, uint2* bufHash, int numPnt )
|
||||
{
|
||||
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
|
||||
|
||||
//if ( ndx < 1024 ) {
|
||||
|
||||
float3* pos = (float3*) (bufPntSort + __mul24(ndx, simData.stride));
|
||||
|
||||
// Find 2x2x2 grid cells
|
||||
// - Use registers only, no arrays (local-memory too slow)
|
||||
int3 cell;
|
||||
int gc0, gc1, gc2, gc3, gc4, gc5, gc6, gc7;
|
||||
float gs = simData.smooth_rad / simData.sim_scale;
|
||||
|
||||
cell.x = max(0, (int)((-gs + pos->x - simData.min.x) * simData.delta.x));
|
||||
cell.y = max(0, (int)((-gs + pos->y - simData.min.y) * simData.delta.y));
|
||||
cell.z = max(0, (int)((-gs + pos->z - simData.min.z) * simData.delta.z));
|
||||
gc0 = __mul24(__mul24(cell.z, simData.res.y) + cell.y, simData.res.x) + cell.x;
|
||||
gc1 = gc0 + 1;
|
||||
gc2 = gc0 + simData.res.x;
|
||||
gc3 = gc2 + 1;
|
||||
if ( cell.z+1 < simData.res.z ) {
|
||||
gc4 = gc0 + __mul24(simData.res.x, simData.res.y);
|
||||
gc5 = gc4 + 1;
|
||||
gc6 = gc4 + simData.res.x;
|
||||
gc7 = gc6 + 1;
|
||||
}
|
||||
if ( cell.x+1 >= simData.res.x ) {
|
||||
gc1 = -1; gc3 = -1;
|
||||
gc5 = -1; gc7 = -1;
|
||||
}
|
||||
if ( cell.y+1 >= simData.res.y ) {
|
||||
gc2 = -1; gc3 = -1;
|
||||
gc6 = -1; gc7 = -1;
|
||||
}
|
||||
// Sum Pressure
|
||||
float sum = 0.0;
|
||||
bufNeighbor[ __mul24(ndx, MAX_NBR) ] = 1;
|
||||
if (gc0 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc0], gc0, bufPntSort, bufHash );
|
||||
if (gc1 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc1], gc1, bufPntSort, bufHash );
|
||||
if (gc2 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc2], gc2, bufPntSort, bufHash );
|
||||
if (gc3 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc3], gc3, bufPntSort, bufHash );
|
||||
if (gc4 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc4], gc4, bufPntSort, bufHash );
|
||||
if (gc5 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc5], gc5, bufPntSort, bufHash );
|
||||
if (gc6 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc6], gc6, bufPntSort, bufHash );
|
||||
if (gc7 != -1 ) sum += contributePressure ( ndx, pos, bufGrid[gc7], gc7, bufPntSort, bufHash );
|
||||
|
||||
// Compute Density & Pressure
|
||||
sum = sum * simData.pmass * simData.poly6kern;
|
||||
if ( sum == 0.0 ) sum = 1.0;
|
||||
*(float*) ((char*)pos + OFFSET_PRESS) = ( sum - simData.rest_dens ) * simData.stiffness;
|
||||
*(float*) ((char*)pos + OFFSET_DENS) = 1.0f / sum;
|
||||
|
||||
//}
|
||||
//__syncthreads ();
|
||||
}
|
||||
|
||||
__device__ void contributeForce ( float3& force, int pndx, float3* p, int qndx, int grid_ndx, char* bufPnts, uint2* bufHash )
|
||||
{
|
||||
float press = *(float*) ((char*)p + OFFSET_PRESS);
|
||||
float dens = *(float*) ((char*)p + OFFSET_DENS);
|
||||
float3 veval = *(float3*) ((char*)p + OFFSET_VEVAL );
|
||||
float3 qeval, dist;
|
||||
float c, ndistj, dsq;
|
||||
float pterm, dterm, vterm;
|
||||
float3* qpos;
|
||||
float d = simData.sim_scale;
|
||||
|
||||
vterm = simData.lapkern * simData.visc;
|
||||
|
||||
for ( ; qndx < simData.pnts; qndx++ ) {
|
||||
|
||||
if ( bufHash[qndx].x != grid_ndx || qndx == NULL_HASH) break;
|
||||
|
||||
if ( qndx != pndx ) {
|
||||
qpos = (float3*) ( bufPnts + __mul24(qndx, simData.stride ));
|
||||
|
||||
dist.x = ( p->x - qpos->x )*d; // dist in cm
|
||||
dist.y = ( p->y - qpos->y )*d;
|
||||
dist.z = ( p->z - qpos->z )*d;
|
||||
dsq = (dist.x*dist.x + dist.y*dist.y + dist.z*dist.z);
|
||||
if ( dsq < simData.r2 ) {
|
||||
ndistj = sqrt(dsq);
|
||||
c = ( simData.smooth_rad - ndistj );
|
||||
dist.x = ( p->x - qpos->x )*d; // dist in cm
|
||||
dist.y = ( p->y - qpos->y )*d;
|
||||
dist.z = ( p->z - qpos->z )*d;
|
||||
pterm = -0.5f * c * simData.spikykern * ( press + *(float*)((char*)qpos+OFFSET_PRESS) ) / ndistj;
|
||||
dterm = c * dens * *(float*)((char*)qpos+OFFSET_DENS);
|
||||
qeval = *(float3*)((char*)qpos+OFFSET_VEVAL);
|
||||
force.x += ( pterm * dist.x + vterm * ( qeval.x - veval.x )) * dterm;
|
||||
force.y += ( pterm * dist.y + vterm * ( qeval.y - veval.y )) * dterm;
|
||||
force.z += ( pterm * dist.z + vterm * ( qeval.z - veval.z )) * dterm;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
__global__ void computeForce ( char* bufPntSort, int* bufGrid, uint2* bufHash, int numPnt )
|
||||
{
|
||||
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
|
||||
|
||||
//if ( ndx < numPnt ) {
|
||||
|
||||
float3* pos = (float3*) (bufPntSort + __mul24(ndx, simData.stride));
|
||||
|
||||
// Find 2x2x2 grid cells
|
||||
// - Use registers only, no arrays (local-memory too slow)
|
||||
int3 cell;
|
||||
int gc0, gc1, gc2, gc3, gc4, gc5, gc6, gc7;
|
||||
float gs = simData.smooth_rad / simData.sim_scale;
|
||||
|
||||
cell.x = max(0, (int)((-gs + pos->x - simData.min.x) * simData.delta.x));
|
||||
cell.y = max(0, (int)((-gs + pos->y - simData.min.y) * simData.delta.y));
|
||||
cell.z = max(0, (int)((-gs + pos->z - simData.min.z) * simData.delta.z));
|
||||
gc0 = __mul24(__mul24(cell.z, simData.res.y) + cell.y, simData.res.x) + cell.x;
|
||||
gc1 = gc0 + 1;
|
||||
gc2 = gc0 + simData.res.x;
|
||||
gc3 = gc2 + 1;
|
||||
if ( cell.z+1 < simData.res.z ) {
|
||||
gc4 = gc0 + __mul24(simData.res.x, simData.res.y);
|
||||
gc5 = gc4 + 1;
|
||||
gc6 = gc4 + simData.res.x;
|
||||
gc7 = gc6 + 1;
|
||||
}
|
||||
if ( cell.x+1 >= simData.res.x ) {
|
||||
gc1 = -1; gc3 = -1;
|
||||
gc5 = -1; gc7 = -1;
|
||||
}
|
||||
if ( cell.y+1 >= simData.res.y ) {
|
||||
gc2 = -1; gc3 = -1;
|
||||
gc6 = -1; gc7 = -1;
|
||||
}
|
||||
// Sum Pressure
|
||||
float3 force = make_float3(0,0,0);
|
||||
if (gc0 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc0], gc0, bufPntSort, bufHash );
|
||||
if (gc1 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc1], gc1, bufPntSort, bufHash );
|
||||
if (gc2 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc2], gc2, bufPntSort, bufHash );
|
||||
if (gc3 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc3], gc3, bufPntSort, bufHash );
|
||||
if (gc4 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc4], gc4, bufPntSort, bufHash );
|
||||
if (gc5 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc5], gc5, bufPntSort, bufHash );
|
||||
if (gc6 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc6], gc6, bufPntSort, bufHash );
|
||||
if (gc7 != -1 ) contributeForce ( force, ndx, pos, bufGrid[gc7], gc7, bufPntSort, bufHash );
|
||||
|
||||
// Update Force
|
||||
*(float3*) ((char*)pos + OFFSET_FORCE ) = force;
|
||||
|
||||
//}
|
||||
//__syncthreads ();
|
||||
}
|
||||
|
||||
|
||||
__global__ void computeForceNbr ( char* bufPntSort, int numPnt )
|
||||
{
|
||||
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
|
||||
|
||||
if ( ndx < numPnt ) {
|
||||
|
||||
float3* pos = (float3*) (bufPntSort + __mul24(ndx, simData.stride));
|
||||
|
||||
float3* qpos;
|
||||
float press = *(float*) ((char*)pos + OFFSET_PRESS);
|
||||
float dens = *(float*) ((char*)pos + OFFSET_DENS);
|
||||
float3 veval = *(float3*) ((char*)pos + OFFSET_VEVAL );
|
||||
float3 qeval, dist, force;
|
||||
float d = simData.sim_scale;
|
||||
float c, ndistj;
|
||||
float pterm, dterm, vterm;
|
||||
vterm = simData.lapkern * simData.visc;
|
||||
int nbr = __mul24(ndx, MAX_NBR);
|
||||
|
||||
int ncnt = bufNeighbor[ nbr ];
|
||||
|
||||
force = make_float3(0,0,0);
|
||||
for (int j=1; j < ncnt; j++) { // base 1, n[0] = count
|
||||
ndistj = bufNdist[ nbr+j ];
|
||||
qpos = (float3*) (bufPntSort + __mul24( bufNeighbor[ nbr+j ], simData.stride) );
|
||||
c = ( simData.smooth_rad - ndistj );
|
||||
dist.x = ( pos->x - qpos->x )*d; // dist in cm
|
||||
dist.y = ( pos->y - qpos->y )*d;
|
||||
dist.z = ( pos->z - qpos->z )*d;
|
||||
pterm = -0.5f * c * simData.spikykern * ( press + *(float*)((char*)qpos+OFFSET_PRESS) ) / ndistj;
|
||||
dterm = c * dens * *(float*)((char*)qpos+OFFSET_DENS);
|
||||
qeval = *(float3*)((char*)qpos+OFFSET_VEVAL);
|
||||
force.x += ( pterm * dist.x + vterm * ( qeval.x - veval.x )) * dterm;
|
||||
force.y += ( pterm * dist.y + vterm * ( qeval.y - veval.y )) * dterm;
|
||||
force.z += ( pterm * dist.z + vterm * ( qeval.z - veval.z )) * dterm;
|
||||
}
|
||||
*(float3*) ((char*)pos + OFFSET_FORCE ) = force;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
__global__ void advanceParticles ( char* bufPntSort, int numPnt, float dt, float ss )
|
||||
{
|
||||
uint ndx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; // particle index
|
||||
|
||||
if ( ndx < numPnt ) {
|
||||
|
||||
// Get particle vars
|
||||
float3* pos = (float3*) (bufPntSort + __mul24(ndx, simData.stride));
|
||||
float3* vel = (float3*) ((char*)pos + OFFSET_VEL );
|
||||
float3* vel_eval = (float3*) ((char*)pos + OFFSET_VEVAL );
|
||||
float3 accel = *(float3*) ((char*)pos + OFFSET_FORCE );
|
||||
float3 vcurr, vnext;
|
||||
|
||||
// Leapfrog integration
|
||||
accel.x *= 0.00020543; // NOTE - To do: SPH_PMASS should be passed in
|
||||
accel.y *= 0.00020543;
|
||||
accel.z *= 0.00020543;
|
||||
accel.z -= 9.8;
|
||||
|
||||
vcurr = *vel;
|
||||
vnext.x = accel.x*dt + vcurr.x;
|
||||
vnext.y = accel.y*dt + vcurr.y;
|
||||
vnext.z = accel.z*dt + vcurr.z; // v(t+1/2) = v(t-1/2) + a(t) dt
|
||||
|
||||
accel.x = (vcurr.x + vnext.x) * 0.5; // v(t+1) = [v(t-1/2) + v(t+1/2)] * 0.5 used to compute forces later
|
||||
accel.y = (vcurr.y + vnext.y) * 0.5; // v(t+1) = [v(t-1/2) + v(t+1/2)] * 0.5 used to compute forces later
|
||||
accel.z = (vcurr.z + vnext.z) * 0.5; // v(t+1) = [v(t-1/2) + v(t+1/2)] * 0.5 used to compute forces later
|
||||
|
||||
*vel_eval = accel;
|
||||
*vel = vnext;
|
||||
|
||||
dt /= simData.sim_scale;
|
||||
vnext.x = pos->x + vnext.x*dt;
|
||||
vnext.y = pos->y + vnext.y*dt;
|
||||
vnext.z = pos->z + vnext.z*dt;
|
||||
*pos = vnext; // p(t+1) = p(t) + v(t+1/2) dt
|
||||
}
|
||||
|
||||
__syncthreads ();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,45 +1,45 @@
|
||||
/*
|
||||
FLUIDS v.1 - SPH Fluid Simulator for CPU and GPU
|
||||
Copyright (C) 2009. Rama Hoetzlein, http://www.rchoetzlein.com
|
||||
|
||||
ZLib license
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
#ifndef _PARTICLES_KERNEL_H_
|
||||
#define _PARTICLES_KERNEL_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include "cutil_math.h"
|
||||
#include "math_constants.h"
|
||||
|
||||
// Insert particles in grid
|
||||
|
||||
__global__ void insertParticles ( char* pntData, uint pntStride )
|
||||
{
|
||||
int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
|
||||
float4 p = *(float4*) (pntData + index*pntStride);
|
||||
|
||||
// get address in grid
|
||||
int3 gridPos = calcGridPos(p);
|
||||
|
||||
addParticleToCell(gridPos, index, gridCounters, gridCells);
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
/*
|
||||
FLUIDS v.1 - SPH Fluid Simulator for CPU and GPU
|
||||
Copyright (C) 2009. Rama Hoetzlein, http://www.rchoetzlein.com
|
||||
|
||||
ZLib license
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
#ifndef _PARTICLES_KERNEL_H_
|
||||
#define _PARTICLES_KERNEL_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include "cutil_math.h"
|
||||
#include "math_constants.h"
|
||||
|
||||
// Insert particles in grid
|
||||
|
||||
__global__ void insertParticles ( char* pntData, uint pntStride )
|
||||
{
|
||||
int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
|
||||
float4 p = *(float4*) (pntData + index*pntStride);
|
||||
|
||||
// get address in grid
|
||||
int3 gridPos = calcGridPos(p);
|
||||
|
||||
addParticleToCell(gridPos, index, gridCounters, gridCells);
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,79 +1,79 @@
|
||||
/*
|
||||
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO USER:
|
||||
*
|
||||
* This source code is subject to NVIDIA ownership rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
|
||||
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
|
||||
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
|
||||
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
|
||||
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
|
||||
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
|
||||
* OR PERFORMANCE OF THIS SOURCE CODE.
|
||||
*
|
||||
* U.S. Government End Users. This source code is a "commercial item" as
|
||||
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
|
||||
* "commercial computer software" and "commercial computer software
|
||||
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
|
||||
* and is provided to the U.S. Government only as a commercial end item.
|
||||
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
|
||||
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
|
||||
* source code with only those rights set forth herein.
|
||||
*/
|
||||
|
||||
/* Radixsort project with key/value and arbitrary datset size support
|
||||
* which demonstrates the use of CUDA in a multi phase sorting
|
||||
* computation.
|
||||
* Host code.
|
||||
*/
|
||||
|
||||
#include "radixsort.cuh"
|
||||
#include "radixsort_kernel.cu"
|
||||
|
||||
extern "C"
|
||||
{
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Perform a radix sort
|
||||
//! Sorting performed in place on passed arrays.
|
||||
//!
|
||||
//! @param pData0 input and output array - data will be sorted
|
||||
//! @param pData1 additional array to allow ping pong computation
|
||||
//! @param elements number of elements to sort
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
void RadixSort(KeyValuePair *pData0, KeyValuePair *pData1, uint elements, uint bits)
|
||||
{
|
||||
// Round element count to total number of threads for efficiency
|
||||
uint elements_rounded_to_3072;
|
||||
int modval = elements % 3072;
|
||||
if( modval == 0 )
|
||||
elements_rounded_to_3072 = elements;
|
||||
else
|
||||
elements_rounded_to_3072 = elements + (3072 - (modval));
|
||||
|
||||
// Iterate over n bytes of y bit word, using each byte to sort the list in turn
|
||||
for (uint shift = 0; shift < bits; shift += RADIX)
|
||||
{
|
||||
// Perform one round of radix sorting
|
||||
|
||||
// Generate per radix group sums radix counts across a radix group
|
||||
RadixSum<<<NUM_BLOCKS, NUM_THREADS_PER_BLOCK, GRFSIZE>>>(pData0, elements, elements_rounded_to_3072, shift);
|
||||
// Prefix sum in radix groups, and then between groups throughout a block
|
||||
RadixPrefixSum<<<PREFIX_NUM_BLOCKS, PREFIX_NUM_THREADS_PER_BLOCK, PREFIX_GRFSIZE>>>();
|
||||
// Sum the block offsets and then shuffle data into bins
|
||||
RadixAddOffsetsAndShuffle<<<NUM_BLOCKS, NUM_THREADS_PER_BLOCK, SHUFFLE_GRFSIZE>>>(pData0, pData1, elements, elements_rounded_to_3072, shift);
|
||||
|
||||
// Exchange data pointers
|
||||
KeyValuePair* pTemp = pData0;
|
||||
pData0 = pData1;
|
||||
pData1 = pTemp;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
/*
|
||||
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO USER:
|
||||
*
|
||||
* This source code is subject to NVIDIA ownership rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
|
||||
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
|
||||
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
|
||||
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
|
||||
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
|
||||
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
|
||||
* OR PERFORMANCE OF THIS SOURCE CODE.
|
||||
*
|
||||
* U.S. Government End Users. This source code is a "commercial item" as
|
||||
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
|
||||
* "commercial computer software" and "commercial computer software
|
||||
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
|
||||
* and is provided to the U.S. Government only as a commercial end item.
|
||||
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
|
||||
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
|
||||
* source code with only those rights set forth herein.
|
||||
*/
|
||||
|
||||
/* Radixsort project with key/value and arbitrary datset size support
|
||||
* which demonstrates the use of CUDA in a multi phase sorting
|
||||
* computation.
|
||||
* Host code.
|
||||
*/
|
||||
|
||||
#include "radixsort.cuh"
|
||||
#include "radixsort_kernel.cu"
|
||||
|
||||
extern "C"
|
||||
{
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Perform a radix sort
|
||||
//! Sorting performed in place on passed arrays.
|
||||
//!
|
||||
//! @param pData0 input and output array - data will be sorted
|
||||
//! @param pData1 additional array to allow ping pong computation
|
||||
//! @param elements number of elements to sort
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
void RadixSort(KeyValuePair *pData0, KeyValuePair *pData1, uint elements, uint bits)
|
||||
{
|
||||
// Round element count to total number of threads for efficiency
|
||||
uint elements_rounded_to_3072;
|
||||
int modval = elements % 3072;
|
||||
if( modval == 0 )
|
||||
elements_rounded_to_3072 = elements;
|
||||
else
|
||||
elements_rounded_to_3072 = elements + (3072 - (modval));
|
||||
|
||||
// Iterate over n bytes of y bit word, using each byte to sort the list in turn
|
||||
for (uint shift = 0; shift < bits; shift += RADIX)
|
||||
{
|
||||
// Perform one round of radix sorting
|
||||
|
||||
// Generate per radix group sums radix counts across a radix group
|
||||
RadixSum<<<NUM_BLOCKS, NUM_THREADS_PER_BLOCK, GRFSIZE>>>(pData0, elements, elements_rounded_to_3072, shift);
|
||||
// Prefix sum in radix groups, and then between groups throughout a block
|
||||
RadixPrefixSum<<<PREFIX_NUM_BLOCKS, PREFIX_NUM_THREADS_PER_BLOCK, PREFIX_GRFSIZE>>>();
|
||||
// Sum the block offsets and then shuffle data into bins
|
||||
RadixAddOffsetsAndShuffle<<<NUM_BLOCKS, NUM_THREADS_PER_BLOCK, SHUFFLE_GRFSIZE>>>(pData0, pData1, elements, elements_rounded_to_3072, shift);
|
||||
|
||||
// Exchange data pointers
|
||||
KeyValuePair* pTemp = pData0;
|
||||
pData0 = pData1;
|
||||
pData1 = pTemp;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -1,63 +1,63 @@
|
||||
/*
|
||||
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO USER:
|
||||
*
|
||||
* This source code is subject to NVIDIA ownership rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
|
||||
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
|
||||
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
|
||||
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
|
||||
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
|
||||
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
|
||||
* OR PERFORMANCE OF THIS SOURCE CODE.
|
||||
*
|
||||
* U.S. Government End Users. This source code is a "commercial item" as
|
||||
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
|
||||
* "commercial computer software" and "commercial computer software
|
||||
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
|
||||
* and is provided to the U.S. Government only as a commercial end item.
|
||||
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
|
||||
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
|
||||
* source code with only those rights set forth herein.
|
||||
*/
|
||||
|
||||
/* Radixsort project which demonstrates the use of CUDA in a multi phase
|
||||
* sorting computation.
|
||||
* Type definitions.
|
||||
*/
|
||||
|
||||
#ifndef _RADIXSORT_H_
|
||||
#define _RADIXSORT_H_
|
||||
|
||||
#include <host_defines.h>
|
||||
|
||||
#define SYNCIT __syncthreads()
|
||||
|
||||
// Use 16 bit keys/values
|
||||
#define SIXTEEN 0
|
||||
|
||||
typedef unsigned int uint;
|
||||
typedef unsigned short ushort;
|
||||
|
||||
#if SIXTEEN
|
||||
typedef struct __align__(4) {
|
||||
ushort key;
|
||||
ushort value;
|
||||
#else
|
||||
typedef struct __align__(8) {
|
||||
uint key;
|
||||
uint value;
|
||||
#endif
|
||||
} KeyValuePair;
|
||||
|
||||
extern "C" {
|
||||
void RadixSort(KeyValuePair *pData0, KeyValuePair *pData1, uint elements, uint bits);
|
||||
}
|
||||
|
||||
#endif // #ifndef _RADIXSORT_H_
|
||||
/*
|
||||
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO USER:
|
||||
*
|
||||
* This source code is subject to NVIDIA ownership rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
|
||||
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
|
||||
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
|
||||
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
|
||||
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
|
||||
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
|
||||
* OR PERFORMANCE OF THIS SOURCE CODE.
|
||||
*
|
||||
* U.S. Government End Users. This source code is a "commercial item" as
|
||||
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
|
||||
* "commercial computer software" and "commercial computer software
|
||||
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
|
||||
* and is provided to the U.S. Government only as a commercial end item.
|
||||
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
|
||||
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
|
||||
* source code with only those rights set forth herein.
|
||||
*/
|
||||
|
||||
/* Radixsort project which demonstrates the use of CUDA in a multi phase
|
||||
* sorting computation.
|
||||
* Type definitions.
|
||||
*/
|
||||
|
||||
#ifndef _RADIXSORT_H_
|
||||
#define _RADIXSORT_H_
|
||||
|
||||
#include <host_defines.h>
|
||||
|
||||
#define SYNCIT __syncthreads()
|
||||
|
||||
// Use 16 bit keys/values
|
||||
#define SIXTEEN 0
|
||||
|
||||
typedef unsigned int uint;
|
||||
typedef unsigned short ushort;
|
||||
|
||||
#if SIXTEEN
|
||||
typedef struct __align__(4) {
|
||||
ushort key;
|
||||
ushort value;
|
||||
#else
|
||||
typedef struct __align__(8) {
|
||||
uint key;
|
||||
uint value;
|
||||
#endif
|
||||
} KeyValuePair;
|
||||
|
||||
extern "C" {
|
||||
void RadixSort(KeyValuePair *pData0, KeyValuePair *pData1, uint elements, uint bits);
|
||||
}
|
||||
|
||||
#endif // #ifndef _RADIXSORT_H_
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user