Added btCudaBroadphase, some early research & development work to accelerate Bullet using CUDA
Re-uses the NVidia particle demo.
This commit is contained in:
7
Extras/CUDA/README.txt
Normal file
7
Extras/CUDA/README.txt
Normal file
@@ -0,0 +1,7 @@
|
||||
btCudaBroadphase is some R&D work, a first attempt to use CUDA in Bullet.
|
||||
It uses the NVidia CUDA particle demo grid broadphase as Bullet broadphase.
|
||||
|
||||
Press 's' to toggle between the original CUDA particle demo and using Bullet+btCudaBroadphase.
|
||||
|
||||
See some related discussion here:
|
||||
http://www.bulletphysics.com/Bullet/phpBB3/viewtopic.php?f=9&t=500&start=105
|
||||
1139
Extras/CUDA/btCudaBroadphase.cpp
Normal file
1139
Extras/CUDA/btCudaBroadphase.cpp
Normal file
File diff suppressed because it is too large
Load Diff
124
Extras/CUDA/btCudaBroadphase.h
Normal file
124
Extras/CUDA/btCudaBroadphase.h
Normal file
@@ -0,0 +1,124 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library
|
||||
Copyright (c) 2003-2008 Erwin Coumans http://continuousphysics.com/Bullet/
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
#ifndef CUDA_BROADPHASE_H
|
||||
#define CUDA_BROADPHASE_H
|
||||
|
||||
#include "BulletCollision/BroadphaseCollision/btSimpleBroadphase.h"
|
||||
|
||||
///The btCudaBroadphase uses CUDA to compute overlapping pairs using a GPU.
|
||||
class btCudaBroadphase : public btSimpleBroadphase
|
||||
{
|
||||
|
||||
|
||||
|
||||
bool m_bInitialized;
|
||||
int m_numParticles;
|
||||
|
||||
// CPU data
|
||||
float* m_hPos;
|
||||
float* m_hVel;
|
||||
float* m_hSortedPos;
|
||||
|
||||
unsigned int* m_hGridCounters;
|
||||
unsigned int* m_hGridCells;
|
||||
|
||||
unsigned int* m_hParticleHash;
|
||||
unsigned int* m_hCellStart;
|
||||
|
||||
|
||||
// GPU data
|
||||
float* m_dPos[2];
|
||||
float* m_dVel[2];
|
||||
|
||||
float* m_dSortedPos;
|
||||
float* m_dSortedVel;
|
||||
|
||||
// uniform grid data
|
||||
unsigned int* m_dGridCounters; // counts number of entries per grid cell
|
||||
unsigned int* m_dGridCells; // contains indices of up to "m_maxParticlesPerCell" particles per cell
|
||||
|
||||
unsigned int* m_dParticleHash[2];
|
||||
unsigned int* m_dCellStart;
|
||||
|
||||
unsigned int m_posVbo[2];
|
||||
unsigned int m_colorVBO;
|
||||
|
||||
unsigned int m_currentPosRead, m_currentVelRead;
|
||||
unsigned int m_currentPosWrite, m_currentVelWrite;
|
||||
|
||||
// params
|
||||
struct SimParams& m_simParams;
|
||||
|
||||
|
||||
|
||||
unsigned int m_maxParticlesPerCell;
|
||||
|
||||
protected:
|
||||
|
||||
unsigned int createVBO(unsigned int size);
|
||||
|
||||
void _initialize(int numParticles);
|
||||
|
||||
void _finalize();
|
||||
|
||||
|
||||
|
||||
public:
|
||||
|
||||
enum ParticleArray
|
||||
{
|
||||
POSITION,
|
||||
VELOCITY,
|
||||
};
|
||||
|
||||
enum ParticleConfig
|
||||
{
|
||||
CONFIG_RANDOM,
|
||||
CONFIG_GRID,
|
||||
_NUM_CONFIGS
|
||||
};
|
||||
|
||||
btCudaBroadphase(SimParams& simParams,int maxProxies);
|
||||
|
||||
virtual ~btCudaBroadphase();
|
||||
|
||||
void initGrid(unsigned int* size, float spacing, float jitter, unsigned int numParticles);
|
||||
|
||||
void reset(ParticleConfig config);
|
||||
|
||||
void setArray(ParticleArray array, const float* data, int start, int count);
|
||||
|
||||
float* getArray(ParticleArray array);
|
||||
|
||||
void addSphere(int start, float *pos, float *vel, int r, float spacing);
|
||||
|
||||
virtual void calculateOverlappingPairs(btDispatcher* dispatcher);
|
||||
|
||||
unsigned int getCurrentReadBuffer() const { return m_posVbo[m_currentPosRead]; }
|
||||
unsigned int getColorBuffer() const { return m_colorVBO; }
|
||||
void dumpParticles(unsigned int start, unsigned int count);
|
||||
void dumpGrid();
|
||||
|
||||
float* copyBuffersFromDeviceToHost();
|
||||
void copyBuffersFromHostToDevice();
|
||||
float* getHvelPtr();
|
||||
float* getHposPtr();
|
||||
void quickHack(float deltaTime);
|
||||
void quickHack2();
|
||||
void integrate();
|
||||
|
||||
};
|
||||
#endif //CUDA_BROADPHASE_H
|
||||
113
Extras/CUDA/btCudaBroadphase.sln
Normal file
113
Extras/CUDA/btCudaBroadphase.sln
Normal file
@@ -0,0 +1,113 @@
|
||||
|
||||
Microsoft Visual Studio Solution File, Format Version 9.00
|
||||
# Visual Studio 2005
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "btCudaBroadphase", "btCudaBroadphase.vcproj", "{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE} = {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}
|
||||
{61BD1097-CF2E-B296-DAA9-73A6FE135319} = {61BD1097-CF2E-B296-DAA9-73A6FE135319}
|
||||
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A} = {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libbulletmath", "..\..\msvc\8\libbulletmath.vcproj", "{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libbulletdynamics", "..\..\msvc\8\libbulletdynamics.vcproj", "{61BD1097-CF2E-B296-DAA9-73A6FE135319}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libbulletcollision", "..\..\msvc\8\libbulletcollision.vcproj", "{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Win32 = Debug|Win32
|
||||
Debug|x64 = Debug|x64
|
||||
DebugDoublePrecision|Win32 = DebugDoublePrecision|Win32
|
||||
DebugDoublePrecision|x64 = DebugDoublePrecision|x64
|
||||
EmuDebug|Win32 = EmuDebug|Win32
|
||||
EmuDebug|x64 = EmuDebug|x64
|
||||
EmuRelease|Win32 = EmuRelease|Win32
|
||||
EmuRelease|x64 = EmuRelease|x64
|
||||
Release|Win32 = Release|Win32
|
||||
Release|x64 = Release|x64
|
||||
ReleaseDoublePrecision|Win32 = ReleaseDoublePrecision|Win32
|
||||
ReleaseDoublePrecision|x64 = ReleaseDoublePrecision|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Debug|x64.Build.0 = Debug|x64
|
||||
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.DebugDoublePrecision|Win32.ActiveCfg = Debug|x64
|
||||
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.DebugDoublePrecision|x64.ActiveCfg = Debug|x64
|
||||
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.DebugDoublePrecision|x64.Build.0 = Debug|x64
|
||||
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuDebug|Win32.ActiveCfg = EmuDebug|Win32
|
||||
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuDebug|Win32.Build.0 = EmuDebug|Win32
|
||||
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuDebug|x64.ActiveCfg = EmuDebug|x64
|
||||
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuDebug|x64.Build.0 = EmuDebug|x64
|
||||
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuRelease|Win32.ActiveCfg = EmuRelease|Win32
|
||||
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuRelease|Win32.Build.0 = EmuRelease|Win32
|
||||
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuRelease|x64.ActiveCfg = EmuRelease|x64
|
||||
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuRelease|x64.Build.0 = EmuRelease|x64
|
||||
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Release|Win32.Build.0 = Release|Win32
|
||||
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Release|x64.ActiveCfg = Release|x64
|
||||
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Release|x64.Build.0 = Release|x64
|
||||
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.ReleaseDoublePrecision|Win32.ActiveCfg = Release|x64
|
||||
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.ReleaseDoublePrecision|x64.ActiveCfg = Release|x64
|
||||
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.ReleaseDoublePrecision|x64.Build.0 = Release|x64
|
||||
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.Debug|x64.ActiveCfg = Debug|Win32
|
||||
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.DebugDoublePrecision|Win32.ActiveCfg = DebugDoublePrecision|Win32
|
||||
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.DebugDoublePrecision|Win32.Build.0 = DebugDoublePrecision|Win32
|
||||
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.DebugDoublePrecision|x64.ActiveCfg = DebugDoublePrecision|Win32
|
||||
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.EmuDebug|Win32.ActiveCfg = Debug|Win32
|
||||
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.EmuDebug|Win32.Build.0 = Debug|Win32
|
||||
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.EmuDebug|x64.ActiveCfg = Debug|Win32
|
||||
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.EmuRelease|Win32.ActiveCfg = Release|Win32
|
||||
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.EmuRelease|Win32.Build.0 = Release|Win32
|
||||
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.EmuRelease|x64.ActiveCfg = Release|Win32
|
||||
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.Release|Win32.Build.0 = Release|Win32
|
||||
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.Release|x64.ActiveCfg = Release|Win32
|
||||
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.ReleaseDoublePrecision|Win32.ActiveCfg = ReleaseDoublePrecision|Win32
|
||||
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.ReleaseDoublePrecision|Win32.Build.0 = ReleaseDoublePrecision|Win32
|
||||
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.ReleaseDoublePrecision|x64.ActiveCfg = ReleaseDoublePrecision|Win32
|
||||
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.Debug|x64.ActiveCfg = Debug|Win32
|
||||
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.DebugDoublePrecision|Win32.ActiveCfg = DebugDoublePrecision|Win32
|
||||
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.DebugDoublePrecision|Win32.Build.0 = DebugDoublePrecision|Win32
|
||||
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.DebugDoublePrecision|x64.ActiveCfg = DebugDoublePrecision|Win32
|
||||
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.EmuDebug|Win32.ActiveCfg = Debug|Win32
|
||||
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.EmuDebug|Win32.Build.0 = Debug|Win32
|
||||
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.EmuDebug|x64.ActiveCfg = Debug|Win32
|
||||
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.EmuRelease|Win32.ActiveCfg = Release|Win32
|
||||
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.EmuRelease|Win32.Build.0 = Release|Win32
|
||||
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.EmuRelease|x64.ActiveCfg = Release|Win32
|
||||
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.Release|Win32.Build.0 = Release|Win32
|
||||
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.Release|x64.ActiveCfg = Release|Win32
|
||||
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.ReleaseDoublePrecision|Win32.ActiveCfg = ReleaseDoublePrecision|Win32
|
||||
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.ReleaseDoublePrecision|Win32.Build.0 = ReleaseDoublePrecision|Win32
|
||||
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.ReleaseDoublePrecision|x64.ActiveCfg = ReleaseDoublePrecision|Win32
|
||||
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.Debug|x64.ActiveCfg = Debug|Win32
|
||||
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.DebugDoublePrecision|Win32.ActiveCfg = DebugDoublePrecision|Win32
|
||||
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.DebugDoublePrecision|Win32.Build.0 = DebugDoublePrecision|Win32
|
||||
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.DebugDoublePrecision|x64.ActiveCfg = DebugDoublePrecision|Win32
|
||||
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.EmuDebug|Win32.ActiveCfg = Debug|Win32
|
||||
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.EmuDebug|Win32.Build.0 = Debug|Win32
|
||||
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.EmuDebug|x64.ActiveCfg = Debug|Win32
|
||||
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.EmuRelease|Win32.ActiveCfg = Release|Win32
|
||||
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.EmuRelease|Win32.Build.0 = Release|Win32
|
||||
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.EmuRelease|x64.ActiveCfg = Release|Win32
|
||||
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.Release|Win32.Build.0 = Release|Win32
|
||||
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.Release|x64.ActiveCfg = Release|Win32
|
||||
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.ReleaseDoublePrecision|Win32.ActiveCfg = ReleaseDoublePrecision|Win32
|
||||
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.ReleaseDoublePrecision|Win32.Build.0 = ReleaseDoublePrecision|Win32
|
||||
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.ReleaseDoublePrecision|x64.ActiveCfg = ReleaseDoublePrecision|Win32
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
1029
Extras/CUDA/btCudaBroadphase.vcproj
Normal file
1029
Extras/CUDA/btCudaBroadphase.vcproj
Normal file
File diff suppressed because it is too large
Load Diff
86
Extras/CUDA/cutil_gl_error.h
Normal file
86
Extras/CUDA/cutil_gl_error.h
Normal file
@@ -0,0 +1,86 @@
|
||||
/*
|
||||
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO USER:
|
||||
*
|
||||
* This source code is subject to NVIDIA ownership rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
|
||||
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
|
||||
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
|
||||
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
|
||||
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
|
||||
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
|
||||
* OR PERFORMANCE OF THIS SOURCE CODE.
|
||||
*
|
||||
* U.S. Government End Users. This source code is a "commercial item" as
|
||||
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
|
||||
* "commercial computer software" and "commercial computer software
|
||||
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
|
||||
* and is provided to the U.S. Government only as a commercial end item.
|
||||
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
|
||||
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
|
||||
* source code with only those rights set forth herein.
|
||||
*/
|
||||
|
||||
#ifndef CUTIL_GL_ERROR
|
||||
#define CUTIL_GL_ERROR
|
||||
|
||||
/* CUda UTility Library */
|
||||
|
||||
// includes, system
|
||||
#ifdef _WIN32
|
||||
# define WINDOWS_LEAN_AND_MEAN
|
||||
# include <windows.h>
|
||||
# include <stdlib.h>
|
||||
# undef min
|
||||
# undef max
|
||||
#endif
|
||||
|
||||
// includes, graphics
|
||||
#if defined (__APPLE__) || defined(MACOSX)
|
||||
#include <OpenGL/gl.h>
|
||||
#include <OpenGL/glu.h>
|
||||
#else
|
||||
#include <GL/gl.h>
|
||||
#include <GL/glu.h>
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Check for OpenGL error
|
||||
//! @return CUTTrue if no GL error has been encountered, otherwise 0
|
||||
//! @param file __FILE__ macro
|
||||
//! @param line __LINE__ macro
|
||||
//! @note The GL error is listed on stderr
|
||||
//! @note This function should be used via the CHECK_ERROR_GL() macro
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
CUTBoolean CUTIL_API
|
||||
cutCheckErrorGL( const char* file, const int line)
|
||||
{
|
||||
CUTBoolean ret_val = CUTTrue;
|
||||
|
||||
// check for error
|
||||
GLenum gl_error = glGetError();
|
||||
if (gl_error != GL_NO_ERROR)
|
||||
{
|
||||
fprintf(stderr, "GL Error in file '%s' in line %d :\n", file, line);
|
||||
fprintf(stderr, "%s\n", gluErrorString(gl_error));
|
||||
ret_val = CUTFalse;
|
||||
}
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
#ifdef _DEBUG
|
||||
|
||||
#define CUT_CHECK_ERROR_GL() \
|
||||
if( CUTFalse == cutCheckErrorGL( __FILE__, __LINE__)) { \
|
||||
exit(EXIT_FAILURE); \
|
||||
}
|
||||
|
||||
#endif // _DEBUG
|
||||
|
||||
#endif // CUTIL_GL_ERROR
|
||||
767
Extras/CUDA/cutil_math.h
Normal file
767
Extras/CUDA/cutil_math.h
Normal file
@@ -0,0 +1,767 @@
|
||||
/*
|
||||
* Copyright 1993-2007 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO USER:
|
||||
*
|
||||
* This source code is subject to NVIDIA ownership rights under U.S. and
|
||||
* international Copyright laws. Users and possessors of this source code
|
||||
* are hereby granted a nonexclusive, royalty-free license to use this code
|
||||
* in individual and commercial software.
|
||||
*
|
||||
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
|
||||
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
|
||||
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
|
||||
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
|
||||
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
|
||||
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
|
||||
* OR PERFORMANCE OF THIS SOURCE CODE.
|
||||
*
|
||||
* U.S. Government End Users. This source code is a "commercial item" as
|
||||
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
|
||||
* "commercial computer software" and "commercial computer software
|
||||
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
|
||||
* and is provided to the U.S. Government only as a commercial end item.
|
||||
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
|
||||
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
|
||||
* source code with only those rights set forth herein.
|
||||
*
|
||||
* Any use of this source code in individual and commercial software must
|
||||
* include, in the user documentation and internal comments to the code,
|
||||
* the above Disclaimer and U.S. Government End Users Notice.
|
||||
*/
|
||||
|
||||
/*
|
||||
This file implements common mathematical operations on vector types
|
||||
(float3, float4 etc.) since these are not provided as standard by CUDA.
|
||||
|
||||
The syntax is modelled on the Cg standard library.
|
||||
*/
|
||||
|
||||
#ifndef CUTIL_MATH_H
|
||||
#define CUTIL_MATH_H
|
||||
|
||||
#include "cuda_runtime.h"
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
typedef unsigned int uint;
|
||||
typedef unsigned short ushort;
|
||||
|
||||
#ifndef __CUDACC__
|
||||
#include <math.h>
|
||||
|
||||
inline float fminf(float a, float b)
|
||||
{
|
||||
return a < b ? a : b;
|
||||
}
|
||||
|
||||
inline float fmaxf(float a, float b)
|
||||
{
|
||||
return a < b ? a : b;
|
||||
}
|
||||
|
||||
inline int max(int a, int b)
|
||||
{
|
||||
return a > b ? a : b;
|
||||
}
|
||||
|
||||
inline int min(int a, int b)
|
||||
{
|
||||
return a < b ? a : b;
|
||||
}
|
||||
#endif
|
||||
|
||||
// float functions
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// lerp
|
||||
inline __device__ __host__ float lerp(float a, float b, float t)
|
||||
{
|
||||
return a + t*(b-a);
|
||||
}
|
||||
|
||||
// clamp
|
||||
inline __device__ __host__ float clamp(float f, float a, float b)
|
||||
{
|
||||
return fmaxf(a, fminf(f, b));
|
||||
}
|
||||
|
||||
// int2 functions
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// negate
|
||||
inline __host__ __device__ int2 operator-(int2 &a)
|
||||
{
|
||||
return make_int2(-a.x, -a.y);
|
||||
}
|
||||
|
||||
// addition
|
||||
inline __host__ __device__ int2 operator+(int2 a, int2 b)
|
||||
{
|
||||
return make_int2(a.x + b.x, a.y + b.y);
|
||||
}
|
||||
inline __host__ __device__ void operator+=(int2 &a, int2 b)
|
||||
{
|
||||
a.x += b.x; a.y += b.y;
|
||||
}
|
||||
|
||||
// subtract
|
||||
inline __host__ __device__ int2 operator-(int2 a, int2 b)
|
||||
{
|
||||
return make_int2(a.x - b.x, a.y - b.y);
|
||||
}
|
||||
inline __host__ __device__ void operator-=(int2 &a, int2 b)
|
||||
{
|
||||
a.x -= b.x; a.y -= b.y;
|
||||
}
|
||||
|
||||
// multiply
|
||||
inline __host__ __device__ int2 operator*(int2 a, int2 b)
|
||||
{
|
||||
return make_int2(a.x * b.x, a.y * b.y);
|
||||
}
|
||||
inline __host__ __device__ int2 operator*(int2 a, int s)
|
||||
{
|
||||
return make_int2(a.x * s, a.y * s);
|
||||
}
|
||||
inline __host__ __device__ int2 operator*(int s, int2 a)
|
||||
{
|
||||
return make_int2(a.x * s, a.y * s);
|
||||
}
|
||||
inline __host__ __device__ void operator*=(int2 &a, int s)
|
||||
{
|
||||
a.x *= s; a.y *= s;
|
||||
}
|
||||
|
||||
// float2 functions
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// additional constructors
|
||||
inline __host__ __device__ float2 make_float2(float s)
|
||||
{
|
||||
return make_float2(s, s);
|
||||
}
|
||||
inline __host__ __device__ float2 make_float2(int2 a)
|
||||
{
|
||||
return make_float2(float(a.x), float(a.y));
|
||||
}
|
||||
|
||||
// negate
|
||||
inline __host__ __device__ float2 operator-(float2 &a)
|
||||
{
|
||||
return make_float2(-a.x, -a.y);
|
||||
}
|
||||
|
||||
// addition
|
||||
inline __host__ __device__ float2 operator+(float2 a, float2 b)
|
||||
{
|
||||
return make_float2(a.x + b.x, a.y + b.y);
|
||||
}
|
||||
inline __host__ __device__ void operator+=(float2 &a, float2 b)
|
||||
{
|
||||
a.x += b.x; a.y += b.y;
|
||||
}
|
||||
|
||||
// subtract
|
||||
inline __host__ __device__ float2 operator-(float2 a, float2 b)
|
||||
{
|
||||
return make_float2(a.x - b.x, a.y - b.y);
|
||||
}
|
||||
inline __host__ __device__ void operator-=(float2 &a, float2 b)
|
||||
{
|
||||
a.x -= b.x; a.y -= b.y;
|
||||
}
|
||||
|
||||
// multiply
|
||||
inline __host__ __device__ float2 operator*(float2 a, float2 b)
|
||||
{
|
||||
return make_float2(a.x * b.x, a.y * b.y);
|
||||
}
|
||||
inline __host__ __device__ float2 operator*(float2 a, float s)
|
||||
{
|
||||
return make_float2(a.x * s, a.y * s);
|
||||
}
|
||||
inline __host__ __device__ float2 operator*(float s, float2 a)
|
||||
{
|
||||
return make_float2(a.x * s, a.y * s);
|
||||
}
|
||||
inline __host__ __device__ void operator*=(float2 &a, float s)
|
||||
{
|
||||
a.x *= s; a.y *= s;
|
||||
}
|
||||
|
||||
// divide
|
||||
inline __host__ __device__ float2 operator/(float2 a, float2 b)
|
||||
{
|
||||
return make_float2(a.x / b.x, a.y / b.y);
|
||||
}
|
||||
inline __host__ __device__ float2 operator/(float2 a, float s)
|
||||
{
|
||||
float inv = 1.0f / s;
|
||||
return a * inv;
|
||||
}
|
||||
inline __host__ __device__ float2 operator/(float s, float2 a)
|
||||
{
|
||||
float inv = 1.0f / s;
|
||||
return a * inv;
|
||||
}
|
||||
inline __host__ __device__ void operator/=(float2 &a, float s)
|
||||
{
|
||||
float inv = 1.0f / s;
|
||||
a *= inv;
|
||||
}
|
||||
|
||||
// lerp
|
||||
inline __device__ __host__ float2 lerp(float2 a, float2 b, float t)
|
||||
{
|
||||
return a + t*(b-a);
|
||||
}
|
||||
|
||||
// clamp
|
||||
inline __device__ __host__ float2 clamp(float2 v, float a, float b)
|
||||
{
|
||||
return make_float2(clamp(v.x, a, b), clamp(v.y, a, b));
|
||||
}
|
||||
|
||||
inline __device__ __host__ float2 clamp(float2 v, float2 a, float2 b)
|
||||
{
|
||||
return make_float2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));
|
||||
}
|
||||
|
||||
// dot product
|
||||
inline __host__ __device__ float dot(float2 a, float2 b)
|
||||
{
|
||||
return a.x * b.x + a.y * b.y;
|
||||
}
|
||||
|
||||
// length
|
||||
inline __host__ __device__ float length(float2 v)
|
||||
{
|
||||
return sqrtf(dot(v, v));
|
||||
}
|
||||
|
||||
// normalize
|
||||
inline __host__ __device__ float2 normalize(float2 v)
|
||||
{
|
||||
float invLen = 1.0f / sqrtf(dot(v, v));
|
||||
return v * invLen;
|
||||
}
|
||||
|
||||
// floor
|
||||
inline __host__ __device__ float2 floor(const float2 v)
|
||||
{
|
||||
return make_float2(floor(v.x), floor(v.y));
|
||||
}
|
||||
|
||||
// reflect
|
||||
inline __host__ __device__ float2 reflect(float2 i, float2 n)
|
||||
{
|
||||
return i - 2.0f * n * dot(n,i);
|
||||
}
|
||||
|
||||
// float3 functions
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// additional constructors
|
||||
inline __host__ __device__ float3 make_float3(float s)
|
||||
{
|
||||
return make_float3(s, s, s);
|
||||
}
|
||||
inline __host__ __device__ float3 make_float3(float2 a)
|
||||
{
|
||||
return make_float3(a.x, a.y, 0.0f);
|
||||
}
|
||||
inline __host__ __device__ float3 make_float3(float2 a, float s)
|
||||
{
|
||||
return make_float3(a.x, a.y, s);
|
||||
}
|
||||
inline __host__ __device__ float3 make_float3(float4 a)
|
||||
{
|
||||
return make_float3(a.x, a.y, a.z); // discards w
|
||||
}
|
||||
inline __host__ __device__ float3 make_float3(int3 a)
|
||||
{
|
||||
return make_float3(float(a.x), float(a.y), float(a.z));
|
||||
}
|
||||
|
||||
// negate
|
||||
inline __host__ __device__ float3 operator-(float3 &a)
|
||||
{
|
||||
return make_float3(-a.x, -a.y, -a.z);
|
||||
}
|
||||
|
||||
// min
|
||||
static __inline__ __host__ __device__ float3 fminf(float3 a, float3 b)
|
||||
{
|
||||
return make_float3(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z));
|
||||
}
|
||||
|
||||
// max
|
||||
static __inline__ __host__ __device__ float3 fmaxf(float3 a, float3 b)
|
||||
{
|
||||
return make_float3(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z));
|
||||
}
|
||||
|
||||
// addition
|
||||
inline __host__ __device__ float3 operator+(float3 a, float3 b)
|
||||
{
|
||||
return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
|
||||
}
|
||||
inline __host__ __device__ float3 operator+(float3 a, float b)
|
||||
{
|
||||
return make_float3(a.x + b, a.y + b, a.z + b);
|
||||
}
|
||||
inline __host__ __device__ void operator+=(float3 &a, float3 b)
|
||||
{
|
||||
a.x += b.x; a.y += b.y; a.z += b.z;
|
||||
}
|
||||
|
||||
// subtract
|
||||
inline __host__ __device__ float3 operator-(float3 a, float3 b)
|
||||
{
|
||||
return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
|
||||
}
|
||||
inline __host__ __device__ float3 operator-(float3 a, float b)
|
||||
{
|
||||
return make_float3(a.x - b, a.y - b, a.z - b);
|
||||
}
|
||||
inline __host__ __device__ void operator-=(float3 &a, float3 b)
|
||||
{
|
||||
a.x -= b.x; a.y -= b.y; a.z -= b.z;
|
||||
}
|
||||
|
||||
// multiply
|
||||
inline __host__ __device__ float3 operator*(float3 a, float3 b)
|
||||
{
|
||||
return make_float3(a.x * b.x, a.y * b.y, a.z * b.z);
|
||||
}
|
||||
inline __host__ __device__ float3 operator*(float3 a, float s)
|
||||
{
|
||||
return make_float3(a.x * s, a.y * s, a.z * s);
|
||||
}
|
||||
inline __host__ __device__ float3 operator*(float s, float3 a)
|
||||
{
|
||||
return make_float3(a.x * s, a.y * s, a.z * s);
|
||||
}
|
||||
inline __host__ __device__ void operator*=(float3 &a, float s)
|
||||
{
|
||||
a.x *= s; a.y *= s; a.z *= s;
|
||||
}
|
||||
|
||||
// divide
|
||||
inline __host__ __device__ float3 operator/(float3 a, float3 b)
|
||||
{
|
||||
return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
|
||||
}
|
||||
inline __host__ __device__ float3 operator/(float3 a, float s)
|
||||
{
|
||||
float inv = 1.0f / s;
|
||||
return a * inv;
|
||||
}
|
||||
inline __host__ __device__ float3 operator/(float s, float3 a)
|
||||
{
|
||||
float inv = 1.0f / s;
|
||||
return a * inv;
|
||||
}
|
||||
inline __host__ __device__ void operator/=(float3 &a, float s)
|
||||
{
|
||||
float inv = 1.0f / s;
|
||||
a *= inv;
|
||||
}
|
||||
|
||||
// lerp
|
||||
inline __device__ __host__ float3 lerp(float3 a, float3 b, float t)
|
||||
{
|
||||
return a + t*(b-a);
|
||||
}
|
||||
|
||||
// clamp
|
||||
inline __device__ __host__ float3 clamp(float3 v, float a, float b)
|
||||
{
|
||||
return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
|
||||
}
|
||||
|
||||
inline __device__ __host__ float3 clamp(float3 v, float3 a, float3 b)
|
||||
{
|
||||
return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
|
||||
}
|
||||
|
||||
// dot product
|
||||
inline __host__ __device__ float dot(float3 a, float3 b)
|
||||
{
|
||||
return a.x * b.x + a.y * b.y + a.z * b.z;
|
||||
}
|
||||
|
||||
// cross product
|
||||
inline __host__ __device__ float3 cross(float3 a, float3 b)
|
||||
{
|
||||
return make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x);
|
||||
}
|
||||
|
||||
// length
|
||||
inline __host__ __device__ float length(float3 v)
|
||||
{
|
||||
return sqrtf(dot(v, v));
|
||||
}
|
||||
|
||||
// normalize
|
||||
inline __host__ __device__ float3 normalize(float3 v)
|
||||
{
|
||||
float invLen = 1.0f / sqrtf(dot(v, v));
|
||||
return v * invLen;
|
||||
}
|
||||
|
||||
// floor
|
||||
inline __host__ __device__ float3 floor(const float3 v)
|
||||
{
|
||||
return make_float3(floor(v.x), floor(v.y), floor(v.z));
|
||||
}
|
||||
|
||||
// reflect
|
||||
inline __host__ __device__ float3 reflect(float3 i, float3 n)
|
||||
{
|
||||
return i - 2.0f * n * dot(n,i);
|
||||
}
|
||||
|
||||
// float4 functions
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// additional constructors
|
||||
inline __host__ __device__ float4 make_float4(float s)
|
||||
{
|
||||
return make_float4(s, s, s, s);
|
||||
}
|
||||
inline __host__ __device__ float4 make_float4(float3 a)
|
||||
{
|
||||
return make_float4(a.x, a.y, a.z, 0.0f);
|
||||
}
|
||||
inline __host__ __device__ float4 make_float4(float3 a, float w)
|
||||
{
|
||||
return make_float4(a.x, a.y, a.z, w);
|
||||
}
|
||||
inline __host__ __device__ float4 make_float4(int4 a)
|
||||
{
|
||||
return make_float4(float(a.x), float(a.y), float(a.z), float(a.w));
|
||||
}
|
||||
|
||||
// negate
|
||||
inline __host__ __device__ float4 operator-(float4 &a)
|
||||
{
|
||||
return make_float4(-a.x, -a.y, -a.z, -a.w);
|
||||
}
|
||||
|
||||
// min
|
||||
static __inline__ __host__ __device__ float4 fminf(float4 a, float4 b)
|
||||
{
|
||||
return make_float4(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z), fminf(a.w,b.w));
|
||||
}
|
||||
|
||||
// max
|
||||
static __inline__ __host__ __device__ float4 fmaxf(float4 a, float4 b)
|
||||
{
|
||||
return make_float4(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z), fmaxf(a.w,b.w));
|
||||
}
|
||||
|
||||
// addition
|
||||
inline __host__ __device__ float4 operator+(float4 a, float4 b)
|
||||
{
|
||||
return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
|
||||
}
|
||||
inline __host__ __device__ void operator+=(float4 &a, float4 b)
|
||||
{
|
||||
a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w;
|
||||
}
|
||||
|
||||
// subtract
|
||||
inline __host__ __device__ float4 operator-(float4 a, float4 b)
|
||||
{
|
||||
return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
|
||||
}
|
||||
inline __host__ __device__ void operator-=(float4 &a, float4 b)
|
||||
{
|
||||
a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w;
|
||||
}
|
||||
|
||||
// multiply
|
||||
inline __host__ __device__ float4 operator*(float4 a, float s)
|
||||
{
|
||||
return make_float4(a.x * s, a.y * s, a.z * s, a.w * s);
|
||||
}
|
||||
inline __host__ __device__ float4 operator*(float s, float4 a)
|
||||
{
|
||||
return make_float4(a.x * s, a.y * s, a.z * s, a.w * s);
|
||||
}
|
||||
inline __host__ __device__ void operator*=(float4 &a, float s)
|
||||
{
|
||||
a.x *= s; a.y *= s; a.z *= s; a.w *= s;
|
||||
}
|
||||
|
||||
// divide
|
||||
inline __host__ __device__ float4 operator/(float4 a, float4 b)
|
||||
{
|
||||
return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w);
|
||||
}
|
||||
inline __host__ __device__ float4 operator/(float4 a, float s)
|
||||
{
|
||||
float inv = 1.0f / s;
|
||||
return a * inv;
|
||||
}
|
||||
inline __host__ __device__ float4 operator/(float s, float4 a)
|
||||
{
|
||||
float inv = 1.0f / s;
|
||||
return a * inv;
|
||||
}
|
||||
inline __host__ __device__ void operator/=(float4 &a, float s)
|
||||
{
|
||||
float inv = 1.0f / s;
|
||||
a *= inv;
|
||||
}
|
||||
|
||||
// lerp
|
||||
inline __device__ __host__ float4 lerp(float4 a, float4 b, float t)
|
||||
{
|
||||
return a + t*(b-a);
|
||||
}
|
||||
|
||||
// clamp
|
||||
inline __device__ __host__ float4 clamp(float4 v, float a, float b)
|
||||
{
|
||||
return make_float4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));
|
||||
}
|
||||
|
||||
inline __device__ __host__ float4 clamp(float4 v, float4 a, float4 b)
|
||||
{
|
||||
return make_float4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));
|
||||
}
|
||||
|
||||
// dot product
|
||||
inline __host__ __device__ float dot(float4 a, float4 b)
|
||||
{
|
||||
return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
|
||||
}
|
||||
|
||||
// length
|
||||
inline __host__ __device__ float length(float4 r)
|
||||
{
|
||||
return sqrtf(dot(r, r));
|
||||
}
|
||||
|
||||
// normalize
|
||||
inline __host__ __device__ float4 normalize(float4 v)
|
||||
{
|
||||
float invLen = 1.0f / sqrtf(dot(v, v));
|
||||
return v * invLen;
|
||||
}
|
||||
|
||||
// floor
|
||||
inline __host__ __device__ float4 floor(const float4 v)
|
||||
{
|
||||
return make_float4(floor(v.x), floor(v.y), floor(v.z), floor(v.w));
|
||||
}
|
||||
|
||||
// int3 functions
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// additional constructors
|
||||
inline __host__ __device__ int3 make_int3(int s)
|
||||
{
|
||||
return make_int3(s, s, s);
|
||||
}
|
||||
inline __host__ __device__ int3 make_int3(float3 a)
|
||||
{
|
||||
return make_int3(int(a.x), int(a.y), int(a.z));
|
||||
}
|
||||
|
||||
// negate
|
||||
inline __host__ __device__ int3 operator-(int3 &a)
|
||||
{
|
||||
return make_int3(-a.x, -a.y, -a.z);
|
||||
}
|
||||
|
||||
// min
|
||||
inline __host__ __device__ int3 min(int3 a, int3 b)
|
||||
{
|
||||
return make_int3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z));
|
||||
}
|
||||
|
||||
// max
|
||||
inline __host__ __device__ int3 max(int3 a, int3 b)
|
||||
{
|
||||
return make_int3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z));
|
||||
}
|
||||
|
||||
// addition
|
||||
inline __host__ __device__ int3 operator+(int3 a, int3 b)
|
||||
{
|
||||
return make_int3(a.x + b.x, a.y + b.y, a.z + b.z);
|
||||
}
|
||||
inline __host__ __device__ void operator+=(int3 &a, int3 b)
|
||||
{
|
||||
a.x += b.x; a.y += b.y; a.z += b.z;
|
||||
}
|
||||
|
||||
// subtract
|
||||
inline __host__ __device__ int3 operator-(int3 a, int3 b)
|
||||
{
|
||||
return make_int3(a.x - b.x, a.y - b.y, a.z - b.z);
|
||||
}
|
||||
|
||||
inline __host__ __device__ void operator-=(int3 &a, int3 b)
|
||||
{
|
||||
a.x -= b.x; a.y -= b.y; a.z -= b.z;
|
||||
}
|
||||
|
||||
// multiply
|
||||
inline __host__ __device__ int3 operator*(int3 a, int3 b)
|
||||
{
|
||||
return make_int3(a.x * b.x, a.y * b.y, a.z * b.z);
|
||||
}
|
||||
inline __host__ __device__ int3 operator*(int3 a, int s)
|
||||
{
|
||||
return make_int3(a.x * s, a.y * s, a.z * s);
|
||||
}
|
||||
inline __host__ __device__ int3 operator*(int s, int3 a)
|
||||
{
|
||||
return make_int3(a.x * s, a.y * s, a.z * s);
|
||||
}
|
||||
inline __host__ __device__ void operator*=(int3 &a, int s)
|
||||
{
|
||||
a.x *= s; a.y *= s; a.z *= s;
|
||||
}
|
||||
|
||||
// divide
|
||||
inline __host__ __device__ int3 operator/(int3 a, int3 b)
|
||||
{
|
||||
return make_int3(a.x / b.x, a.y / b.y, a.z / b.z);
|
||||
}
|
||||
inline __host__ __device__ int3 operator/(int3 a, int s)
|
||||
{
|
||||
return make_int3(a.x / s, a.y / s, a.z / s);
|
||||
}
|
||||
inline __host__ __device__ int3 operator/(int s, int3 a)
|
||||
{
|
||||
return make_int3(a.x / s, a.y / s, a.z / s);
|
||||
}
|
||||
inline __host__ __device__ void operator/=(int3 &a, int s)
|
||||
{
|
||||
a.x /= s; a.y /= s; a.z /= s;
|
||||
}
|
||||
|
||||
// clamp
|
||||
inline __device__ __host__ int clamp(int f, int a, int b)
|
||||
{
|
||||
return max(a, min(f, b));
|
||||
}
|
||||
|
||||
inline __device__ __host__ int3 clamp(int3 v, int a, int b)
|
||||
{
|
||||
return make_int3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
|
||||
}
|
||||
|
||||
inline __device__ __host__ int3 clamp(int3 v, int3 a, int3 b)
|
||||
{
|
||||
return make_int3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
|
||||
}
|
||||
|
||||
|
||||
// uint3 functions
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// additional constructors
|
||||
inline __host__ __device__ uint3 make_uint3(uint s)
|
||||
{
|
||||
return make_uint3(s, s, s);
|
||||
}
|
||||
inline __host__ __device__ uint3 make_uint3(float3 a)
|
||||
{
|
||||
return make_uint3(uint(a.x), uint(a.y), uint(a.z));
|
||||
}
|
||||
|
||||
// min
|
||||
inline __host__ __device__ uint3 min(uint3 a, uint3 b)
|
||||
{
|
||||
return make_uint3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z));
|
||||
}
|
||||
|
||||
// max
|
||||
inline __host__ __device__ uint3 max(uint3 a, uint3 b)
|
||||
{
|
||||
return make_uint3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z));
|
||||
}
|
||||
|
||||
// addition
|
||||
inline __host__ __device__ uint3 operator+(uint3 a, uint3 b)
|
||||
{
|
||||
return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z);
|
||||
}
|
||||
inline __host__ __device__ void operator+=(uint3 &a, uint3 b)
|
||||
{
|
||||
a.x += b.x; a.y += b.y; a.z += b.z;
|
||||
}
|
||||
|
||||
// subtract
|
||||
inline __host__ __device__ uint3 operator-(uint3 a, uint3 b)
|
||||
{
|
||||
return make_uint3(a.x - b.x, a.y - b.y, a.z - b.z);
|
||||
}
|
||||
|
||||
inline __host__ __device__ void operator-=(uint3 &a, uint3 b)
|
||||
{
|
||||
a.x -= b.x; a.y -= b.y; a.z -= b.z;
|
||||
}
|
||||
|
||||
// multiply
|
||||
inline __host__ __device__ uint3 operator*(uint3 a, uint3 b)
|
||||
{
|
||||
return make_uint3(a.x * b.x, a.y * b.y, a.z * b.z);
|
||||
}
|
||||
inline __host__ __device__ uint3 operator*(uint3 a, uint s)
|
||||
{
|
||||
return make_uint3(a.x * s, a.y * s, a.z * s);
|
||||
}
|
||||
inline __host__ __device__ uint3 operator*(uint s, uint3 a)
|
||||
{
|
||||
return make_uint3(a.x * s, a.y * s, a.z * s);
|
||||
}
|
||||
inline __host__ __device__ void operator*=(uint3 &a, uint s)
|
||||
{
|
||||
a.x *= s; a.y *= s; a.z *= s;
|
||||
}
|
||||
|
||||
// divide
|
||||
inline __host__ __device__ uint3 operator/(uint3 a, uint3 b)
|
||||
{
|
||||
return make_uint3(a.x / b.x, a.y / b.y, a.z / b.z);
|
||||
}
|
||||
inline __host__ __device__ uint3 operator/(uint3 a, uint s)
|
||||
{
|
||||
return make_uint3(a.x / s, a.y / s, a.z / s);
|
||||
}
|
||||
inline __host__ __device__ uint3 operator/(uint s, uint3 a)
|
||||
{
|
||||
return make_uint3(a.x / s, a.y / s, a.z / s);
|
||||
}
|
||||
inline __host__ __device__ void operator/=(uint3 &a, uint s)
|
||||
{
|
||||
a.x /= s; a.y /= s; a.z /= s;
|
||||
}
|
||||
|
||||
// clamp
|
||||
inline __device__ __host__ uint clamp(uint f, uint a, uint b)
|
||||
{
|
||||
return max(a, min(f, b));
|
||||
}
|
||||
|
||||
inline __device__ __host__ uint3 clamp(uint3 v, uint a, uint b)
|
||||
{
|
||||
return make_uint3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
|
||||
}
|
||||
|
||||
inline __device__ __host__ uint3 clamp(uint3 v, uint3 a, uint3 b)
|
||||
{
|
||||
return make_uint3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
|
||||
}
|
||||
|
||||
#endif
|
||||
3
Extras/CUDA/param.cpp
Normal file
3
Extras/CUDA/param.cpp
Normal file
@@ -0,0 +1,3 @@
|
||||
#include <param.h>
|
||||
|
||||
const Param<int> dummy("error");
|
||||
226
Extras/CUDA/param.h
Normal file
226
Extras/CUDA/param.h
Normal file
@@ -0,0 +1,226 @@
|
||||
/*
|
||||
Simple parameter system
|
||||
sgreen@nvidia.com 4/2001
|
||||
*/
|
||||
|
||||
#ifndef PARAM_H
|
||||
#define PARAM_H
|
||||
|
||||
#ifdef _WIN32
|
||||
# pragma warning(disable:4786) // No stupid debug warnings
|
||||
#endif
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <iomanip>
|
||||
|
||||
// base class for named parameter
|
||||
class ParamBase {
|
||||
public:
|
||||
ParamBase(char *name) { m_name = name; }
|
||||
|
||||
virtual ~ParamBase() { }
|
||||
|
||||
std::string *GetName() { return &m_name; }
|
||||
|
||||
virtual float GetFloatValue() = 0;
|
||||
virtual int GetIntValue() = 0;
|
||||
|
||||
virtual std::string GetValueString() = 0;
|
||||
|
||||
virtual void Reset() = 0;
|
||||
virtual void Increment() = 0;
|
||||
virtual void Decrement() = 0;
|
||||
|
||||
virtual float GetPercentage() = 0;
|
||||
virtual void SetPercentage(float p) = 0;
|
||||
|
||||
virtual void Write(std::ostream &stream) = 0;
|
||||
virtual void Read(std::istream &stream) = 0;
|
||||
|
||||
virtual bool IsList() = 0;
|
||||
|
||||
protected:
|
||||
std::string m_name;
|
||||
};
|
||||
|
||||
// derived class for single-valued parameter
|
||||
template<class T> class Param : public ParamBase {
|
||||
public:
|
||||
Param(char *name, T value = 0, T min = 0, T max = 10000, T step = 1, T* ptr = 0)
|
||||
: ParamBase(name)
|
||||
{
|
||||
if (ptr) {
|
||||
m_ptr = ptr;
|
||||
} else {
|
||||
m_ptr = &m_value;
|
||||
}
|
||||
*m_ptr = value;
|
||||
m_default = value;
|
||||
|
||||
m_min = min;
|
||||
m_max = max;
|
||||
m_step = step;
|
||||
}
|
||||
|
||||
virtual ~Param() { }
|
||||
|
||||
virtual float GetFloatValue() { return (float) *m_ptr; }
|
||||
virtual int GetIntValue() { return (int) *m_ptr; }
|
||||
|
||||
T GetValue() const { return *m_ptr; }
|
||||
T SetValue(const T value) { *m_ptr = value; }
|
||||
|
||||
// inherited functions
|
||||
std::string GetValueString()
|
||||
{
|
||||
std::ostringstream ost;
|
||||
ost<<std::setprecision(3)<<*m_ptr;
|
||||
return ost.str();
|
||||
}
|
||||
|
||||
float GetPercentage()
|
||||
{
|
||||
return (*m_ptr - m_min) / (float) (m_max - m_min);
|
||||
}
|
||||
|
||||
void SetPercentage(float p)
|
||||
{
|
||||
*m_ptr = (T)(m_min + p * (m_max - m_min));
|
||||
}
|
||||
|
||||
void Reset() { *m_ptr = m_default; }
|
||||
|
||||
void Increment()
|
||||
{
|
||||
*m_ptr += m_step;
|
||||
if (*m_ptr > m_max)
|
||||
*m_ptr = m_max;
|
||||
}
|
||||
|
||||
void Decrement()
|
||||
{
|
||||
*m_ptr -= m_step;
|
||||
if (*m_ptr < m_min)
|
||||
*m_ptr = m_min;
|
||||
}
|
||||
|
||||
void Write(std::ostream &stream) { stream << m_name << " " << *m_ptr << '\n'; }
|
||||
void Read(std::istream &stream) { stream >> m_name >> *m_ptr; }
|
||||
|
||||
bool IsList() { return false; }
|
||||
|
||||
private:
|
||||
T m_value;
|
||||
T *m_ptr; // pointer to value declared elsewhere
|
||||
T m_default, m_min, m_max;
|
||||
T m_step;
|
||||
};
|
||||
|
||||
|
||||
extern const Param<int> dummy;
|
||||
|
||||
// list of parameters
|
||||
class ParamList : public ParamBase {
|
||||
public:
|
||||
ParamList(char *name = "")
|
||||
: ParamBase(name)
|
||||
{
|
||||
active = true;
|
||||
}
|
||||
|
||||
virtual ~ParamList() { }
|
||||
|
||||
virtual float GetFloatValue() { return 0.0f; }
|
||||
virtual int GetIntValue() { return 0; }
|
||||
|
||||
void AddParam(ParamBase *param)
|
||||
{
|
||||
m_params.push_back(param);
|
||||
m_map[*param->GetName()] = param;
|
||||
m_current = m_params.begin();
|
||||
}
|
||||
|
||||
// look-up parameter based on name
|
||||
ParamBase *GetParam(char *name)
|
||||
{
|
||||
ParamBase *p = m_map[name];
|
||||
|
||||
if (p)
|
||||
return p;
|
||||
else
|
||||
return (ParamBase *) &dummy;
|
||||
}
|
||||
|
||||
ParamBase *GetParam(int i)
|
||||
{
|
||||
return m_params[i];
|
||||
}
|
||||
|
||||
ParamBase *GetCurrent()
|
||||
{
|
||||
return *m_current;
|
||||
}
|
||||
|
||||
int GetSize() { return (int)m_params.size(); }
|
||||
|
||||
// inherited functions
|
||||
std::string GetValueString()
|
||||
{
|
||||
// return m_name;
|
||||
return "list";
|
||||
}
|
||||
|
||||
void Reset()
|
||||
{
|
||||
m_current = m_params.begin();
|
||||
}
|
||||
|
||||
void Increment()
|
||||
{
|
||||
m_current++;
|
||||
if (m_current == m_params.end())
|
||||
m_current = m_params.begin();
|
||||
}
|
||||
|
||||
void Decrement()
|
||||
{
|
||||
if (m_current == m_params.begin())
|
||||
m_current = m_params.end()-1;
|
||||
else
|
||||
m_current--;
|
||||
|
||||
}
|
||||
|
||||
float GetPercentage() { return 0.0f; }
|
||||
void SetPercentage(float /*p*/) { ; }
|
||||
|
||||
void Write(std::ostream &stream)
|
||||
{
|
||||
stream << m_name << '\n';
|
||||
for(std::vector<ParamBase *>::const_iterator p = m_params.begin(); p != m_params.end(); ++p) {
|
||||
(*p)->Write(stream);
|
||||
}
|
||||
}
|
||||
|
||||
void Read(std::istream &stream)
|
||||
{
|
||||
stream >> m_name;
|
||||
for(std::vector<ParamBase *>::const_iterator p = m_params.begin(); p != m_params.end(); ++p) {
|
||||
(*p)->Read(stream);
|
||||
}
|
||||
}
|
||||
|
||||
bool IsList() { return true; }
|
||||
|
||||
protected:
|
||||
bool active;
|
||||
std::vector<ParamBase *> m_params;
|
||||
std::map<std::string, ParamBase *> m_map;
|
||||
std::vector<ParamBase *>::const_iterator m_current;
|
||||
};
|
||||
|
||||
#endif
|
||||
209
Extras/CUDA/paramgl.cpp
Normal file
209
Extras/CUDA/paramgl.cpp
Normal file
@@ -0,0 +1,209 @@
|
||||
/*
|
||||
ParamListGL
|
||||
- class derived from ParamList to do simple OpenGL rendering of a parameter list
|
||||
sgg 8/2001
|
||||
*/
|
||||
|
||||
#include <param.h>
|
||||
#include <paramgl.h>
|
||||
|
||||
void beginWinCoords(void)
|
||||
{
|
||||
glMatrixMode(GL_MODELVIEW);
|
||||
glPushMatrix();
|
||||
glLoadIdentity();
|
||||
glTranslatef(0.0, glutGet(GLUT_WINDOW_HEIGHT) - 1, 0.0);
|
||||
glScalef(1.0, -1.0, 1.0);
|
||||
|
||||
glMatrixMode(GL_PROJECTION);
|
||||
glPushMatrix();
|
||||
glLoadIdentity();
|
||||
glOrtho(0, glutGet(GLUT_WINDOW_WIDTH), 0, glutGet(GLUT_WINDOW_HEIGHT), -1, 1);
|
||||
|
||||
glMatrixMode(GL_MODELVIEW);
|
||||
}
|
||||
|
||||
void endWinCoords(void)
|
||||
{
|
||||
glMatrixMode(GL_PROJECTION);
|
||||
glPopMatrix();
|
||||
|
||||
glMatrixMode(GL_MODELVIEW);
|
||||
glPopMatrix();
|
||||
}
|
||||
|
||||
void glPrint(int x, int y, const char *s, void *font)
|
||||
{
|
||||
int i, len;
|
||||
|
||||
glRasterPos2f(x, y);
|
||||
len = (int) strlen(s);
|
||||
for (i = 0; i < len; i++) {
|
||||
glutBitmapCharacter(font, s[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void glPrintShadowed(int x, int y, const char *s, void *font, float *color)
|
||||
{
|
||||
glColor3f(0.0, 0.0, 0.0);
|
||||
glPrint(x-1, y-1, s, font);
|
||||
|
||||
glColor3fv((GLfloat *) color);
|
||||
glPrint(x, y, s, font);
|
||||
}
|
||||
|
||||
|
||||
ParamListGL::ParamListGL(char *name) : ParamList(name)
|
||||
{
|
||||
font = (void *) GLUT_BITMAP_9_BY_15;
|
||||
// font = (void *) GLUT_BITMAP_8_BY_13;
|
||||
|
||||
bar_x = 250;
|
||||
bar_w = 250;
|
||||
bar_h = 10;
|
||||
bar_offset = 5;
|
||||
text_x = 5;
|
||||
separation = 15;
|
||||
value_x = 200;
|
||||
font_h = 15;
|
||||
start_x = 0;
|
||||
start_y = 0;
|
||||
|
||||
text_col_selected[0] = 1.0;
|
||||
text_col_selected[1] = 1.0;
|
||||
text_col_selected[2] = 1.0;
|
||||
|
||||
text_col_unselected[0] = 0.75;
|
||||
text_col_unselected[1] = 0.75;
|
||||
text_col_unselected[2] = 0.75;
|
||||
|
||||
bar_col_outer[0] = 0.0;
|
||||
bar_col_outer[1] = 0.0;
|
||||
bar_col_outer[2] = 0.0;
|
||||
|
||||
bar_col_inner[0] = 0.0;
|
||||
bar_col_inner[1] = 0.0;
|
||||
bar_col_inner[2] = 0.0;
|
||||
|
||||
text_col_shadow[0] = 0.0;
|
||||
text_col_shadow[1] = 0.0;
|
||||
text_col_shadow[2] = 0.0;
|
||||
}
|
||||
|
||||
void
|
||||
ParamListGL::Render(int x, int y, bool shadow)
|
||||
{
|
||||
beginWinCoords();
|
||||
|
||||
start_x = x; start_y = y;
|
||||
|
||||
for(std::vector<ParamBase *>::const_iterator p = m_params.begin(); p != m_params.end(); ++p) {
|
||||
if ((*p)->IsList()) {
|
||||
ParamListGL *list = (ParamListGL *) (*p);
|
||||
list->Render(x+10, y);
|
||||
y += separation*list->GetSize();
|
||||
|
||||
} else {
|
||||
if (p == m_current)
|
||||
glColor3fv(text_col_selected);
|
||||
else
|
||||
glColor3fv(text_col_unselected);
|
||||
|
||||
if (shadow) {
|
||||
glPrintShadowed(x + text_x, y + font_h, (*p)->GetName()->c_str(), font, (p == m_current) ? text_col_selected : text_col_unselected);
|
||||
glPrintShadowed(x + value_x, y + font_h, (*p)->GetValueString().c_str(), font, (p == m_current) ? text_col_selected : text_col_unselected);
|
||||
}
|
||||
else {
|
||||
glPrint(x + text_x, y + font_h, (*p)->GetName()->c_str(), font);
|
||||
glPrint(x + value_x, y + font_h, (*p)->GetValueString().c_str(), font);
|
||||
}
|
||||
|
||||
// glColor3fv((GLfloat *) &bar_col_outer);
|
||||
glBegin(GL_LINE_LOOP);
|
||||
glVertex2f(x + bar_x, y + bar_offset);
|
||||
glVertex2f(x + bar_x + bar_w, y + bar_offset);
|
||||
glVertex2f(x + bar_x + bar_w, y + bar_offset + bar_h);
|
||||
glVertex2f(x + bar_x, y + bar_offset + bar_h);
|
||||
glEnd();
|
||||
|
||||
// glColor3fv((GLfloat *) &bar_col_inner);
|
||||
glRectf(x + bar_x, y + bar_offset + bar_h, x + bar_x + (bar_w*(*p)->GetPercentage()), y + bar_offset);
|
||||
|
||||
y += separation;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
endWinCoords();
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
ParamListGL::Mouse(int x, int y, int button, int state)
|
||||
{
|
||||
if ((y < start_y) || (y > (int)(start_y + (separation * m_params.size()) - 1)))
|
||||
return false;
|
||||
|
||||
int i = (y - start_y) / separation;
|
||||
|
||||
if ((button==GLUT_LEFT_BUTTON) && (state==GLUT_DOWN)) {
|
||||
#if defined(__GNUC__) && (__GNUC__ < 3)
|
||||
m_current = &m_params[i];
|
||||
#else
|
||||
// MJH: workaround since the version of vector::at used here is non-standard
|
||||
for (m_current = m_params.begin(); m_current != m_params.end() && i > 0; m_current++, i--);
|
||||
//m_current = (std::vector<ParamBase *>::const_iterator)&m_params.at(i);
|
||||
#endif
|
||||
|
||||
if ((x > bar_x) && (x < bar_x + bar_w)) {
|
||||
Motion(x, y);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ParamListGL::Motion(int x, int y)
|
||||
{
|
||||
if ((y < start_y) || (y > start_y + (separation * (int)m_params.size()) - 1) )
|
||||
return false;
|
||||
|
||||
if (x < bar_x) {
|
||||
(*m_current)->SetPercentage(0.0);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (x > bar_x + bar_w) {
|
||||
(*m_current)->SetPercentage(1.0);
|
||||
return true;
|
||||
}
|
||||
|
||||
(*m_current)->SetPercentage((x-bar_x) / (float) bar_w);
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
ParamListGL::Special(int key, int /*x*/, int /*y*/)
|
||||
{
|
||||
switch(key) {
|
||||
case GLUT_KEY_DOWN:
|
||||
Increment();
|
||||
break;
|
||||
case GLUT_KEY_UP:
|
||||
Decrement();
|
||||
break;
|
||||
case GLUT_KEY_RIGHT:
|
||||
GetCurrent()->Increment();
|
||||
break;
|
||||
case GLUT_KEY_LEFT:
|
||||
GetCurrent()->Decrement();
|
||||
break;
|
||||
case GLUT_KEY_HOME:
|
||||
GetCurrent()->Reset();
|
||||
break;
|
||||
case GLUT_KEY_END:
|
||||
GetCurrent()->SetPercentage(1.0);
|
||||
break;
|
||||
}
|
||||
glutPostRedisplay();
|
||||
}
|
||||
54
Extras/CUDA/paramgl.h
Normal file
54
Extras/CUDA/paramgl.h
Normal file
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
ParamListGL
|
||||
- class derived from ParamList to do simple OpenGL rendering of a parameter list
|
||||
sgg 8/2001
|
||||
*/
|
||||
|
||||
#ifndef PARAMGL_H
|
||||
#define PARAMGL_H
|
||||
|
||||
#if defined(__APPLE__) || defined(MACOSX)
|
||||
#include <GLUT/glut.h>
|
||||
#else
|
||||
#include <GL/glut.h>
|
||||
#endif
|
||||
|
||||
#include <param.h>
|
||||
|
||||
void beginWinCoords();
|
||||
void endWinCoords();
|
||||
void glPrint(int x, int y, const char *s, void *font);
|
||||
void glPrintShadowed(int x, int y, const char *s, void *font, float *color);
|
||||
|
||||
class ParamListGL : public ParamList {
|
||||
public:
|
||||
ParamListGL(char *name = "");
|
||||
|
||||
void Render(int x, int y, bool shadow = false);
|
||||
bool Mouse(int x, int y, int button=GLUT_LEFT_BUTTON, int state=GLUT_DOWN);
|
||||
bool Motion(int x, int y);
|
||||
void Special(int key, int x, int y);
|
||||
|
||||
void SetSelectedColor(float r, float g, float b) { text_col_selected[0] = r; text_col_selected[1] = g; text_col_selected[2] = b; }
|
||||
void SetUnSelectedColor(float r, float g, float b) { text_col_unselected[0] = r; text_col_unselected[1] = g; text_col_unselected[2] = b; }
|
||||
|
||||
int bar_x;
|
||||
int bar_w;
|
||||
int bar_h;
|
||||
int text_x;
|
||||
int separation;
|
||||
int value_x;
|
||||
int font_h;
|
||||
int start_x, start_y;
|
||||
int bar_offset;
|
||||
|
||||
float text_col_selected[3];
|
||||
float text_col_unselected[3];
|
||||
float text_col_shadow[3];
|
||||
float bar_col_outer[3];
|
||||
float bar_col_inner[3];
|
||||
|
||||
void *font;
|
||||
};
|
||||
|
||||
#endif
|
||||
280
Extras/CUDA/particleSystem.cpp
Normal file
280
Extras/CUDA/particleSystem.cpp
Normal file
@@ -0,0 +1,280 @@
|
||||
/*
|
||||
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO USER:
|
||||
*
|
||||
* This source code is subject to NVIDIA ownership rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
|
||||
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
|
||||
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
|
||||
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
|
||||
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
|
||||
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
|
||||
* OR PERFORMANCE OF THIS SOURCE CODE.
|
||||
*
|
||||
* U.S. Government End Users. This source code is a "commercial item" as
|
||||
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
|
||||
* "commercial computer software" and "commercial computer software
|
||||
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
|
||||
* and is provided to the U.S. Government only as a commercial end item.
|
||||
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
|
||||
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
|
||||
* source code with only those rights set forth herein.
|
||||
*/
|
||||
|
||||
#include "particleSystem.h"
|
||||
#include "particleSystem.cuh"
|
||||
#include "radixsort.cuh"
|
||||
#include "particles_kernel.cuh"
|
||||
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <memory.h>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <algorithm>
|
||||
#include <GL/glew.h>
|
||||
|
||||
#include <btBulletDynamicsCommon.h>
|
||||
|
||||
#include "btCudaBroadphase.h"
|
||||
|
||||
|
||||
#ifndef CUDART_PI_F
|
||||
#define CUDART_PI_F 3.141592654f
|
||||
#endif
|
||||
|
||||
|
||||
ParticleSystem::ParticleSystem(uint numParticles, uint3 gridSize) :
|
||||
m_simulationMode(SIMULATION_BULLET_CPU)//SIMULATION_CUDA)
|
||||
{
|
||||
this->m_params.numBodies = numParticles;
|
||||
this->m_params.m_gridSize = gridSize;
|
||||
initializeBullet();
|
||||
}
|
||||
|
||||
ParticleSystem::~ParticleSystem()
|
||||
{
|
||||
finalizeBullet();
|
||||
}
|
||||
#include "../../Demos/OpenGL/GLDebugDrawer.h"
|
||||
|
||||
GLDebugDrawer debugDrawer;
|
||||
|
||||
void ParticleSystem::initializeBullet()
|
||||
{
|
||||
|
||||
m_collisionConfiguration = new btDefaultCollisionConfiguration();
|
||||
m_dispatcher = new btCollisionDispatcher(m_collisionConfiguration);
|
||||
//m_broadphase = new btDbvtBroadphase();
|
||||
//m_broadphase = new btAxisSweep3(btVector3(-3,-3,-3),btVector3(3,3,3));
|
||||
m_broadphase = new btCudaBroadphase(m_params,m_params.numBodies+6);
|
||||
|
||||
|
||||
|
||||
m_constraintSolver=new btSequentialImpulseConstraintSolver();
|
||||
m_dynamicsWorld = new btDiscreteDynamicsWorld(m_dispatcher,m_broadphase,m_constraintSolver,m_collisionConfiguration);
|
||||
m_dynamicsWorld->setDebugDrawer(&debugDrawer);
|
||||
//debugDrawer.setDebugMode(btIDebugDraw::DBG_DrawPairs);
|
||||
|
||||
|
||||
m_dynamicsWorld->setGravity(100*btVector3(m_params.gravity.x,m_params.gravity.y,m_params.gravity.z));
|
||||
m_dynamicsWorld->getSolverInfo().m_numIterations=1;
|
||||
|
||||
btBoxShape* worldBox = new btBoxShape(btVector3(m_params.worldSize.x/2,m_params.worldSize.y/2,m_params.worldSize.z/2));
|
||||
worldBox->setMargin(0.f);
|
||||
|
||||
//create 6 static planes for the world cube
|
||||
btStaticPlaneShape* planeShape;
|
||||
btRigidBody* body;
|
||||
btVector3 worldSize();
|
||||
|
||||
int i;
|
||||
|
||||
|
||||
|
||||
btSphereShape* particleSphere = new btSphereShape(m_params.particleRadius);
|
||||
particleSphere->setMargin(0.0);
|
||||
btVector3 localInertia;
|
||||
particleSphere->calculateLocalInertia(1,localInertia);
|
||||
|
||||
float* m_hPos = m_broadphase->getHposPtr();
|
||||
|
||||
for (i=0;i<m_params.numBodies;i++)
|
||||
{
|
||||
btRigidBody::btRigidBodyConstructionInfo rbci(1.,0,particleSphere,localInertia);
|
||||
rbci.m_startWorldTransform.setOrigin(btVector3(m_hPos[i*4],m_hPos[i*4+1],m_hPos[i*4+2]));
|
||||
body = new btRigidBody(rbci);
|
||||
body->setActivationState(DISABLE_DEACTIVATION);
|
||||
m_bulletParticles.push_back(body);
|
||||
m_dynamicsWorld->addRigidBody(body);
|
||||
}
|
||||
|
||||
reset(CONFIG_GRID);
|
||||
|
||||
/* for (i=0;i<6;i++)
|
||||
{
|
||||
btVector4 planeEq;
|
||||
worldBox->getPlaneEquation(planeEq,i);
|
||||
|
||||
planeShape = new btStaticPlaneShape(-planeEq,planeEq.getW());
|
||||
planeShape->setMargin(0.f);
|
||||
btRigidBody::btRigidBodyConstructionInfo rbci(0.f,0,planeShape);
|
||||
body = new btRigidBody(rbci);
|
||||
m_dynamicsWorld->addRigidBody(body);
|
||||
}
|
||||
*/
|
||||
|
||||
}
|
||||
|
||||
void ParticleSystem::finalizeBullet()
|
||||
{
|
||||
delete m_dynamicsWorld;
|
||||
delete m_constraintSolver;
|
||||
delete m_broadphase;
|
||||
delete m_dispatcher ;
|
||||
delete m_collisionConfiguration;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void
|
||||
ParticleSystem::update(float deltaTime)
|
||||
{
|
||||
assert(m_bInitialized);
|
||||
|
||||
switch (m_simulationMode)
|
||||
{
|
||||
case SIMULATION_CUDA:
|
||||
{
|
||||
m_broadphase->quickHack(deltaTime);
|
||||
//todo
|
||||
break;
|
||||
}
|
||||
case SIMULATION_BULLET_CPU:
|
||||
{
|
||||
m_broadphase->integrate();
|
||||
|
||||
|
||||
///copy particles from device to main memory
|
||||
{
|
||||
float* hPosData = m_broadphase->copyBuffersFromDeviceToHost();
|
||||
float* m_hVel = m_broadphase->getHvelPtr();
|
||||
m_broadphase->copyBuffersFromHostToDevice();
|
||||
|
||||
|
||||
//sync transform and velocity from particle system to Bullet
|
||||
|
||||
for (int i=0;i<m_params.numBodies;i++)
|
||||
{
|
||||
btTransform& trans = m_bulletParticles[i]->getWorldTransform();
|
||||
trans.setOrigin(btVector3(hPosData[i*4],hPosData[i*4+1],hPosData[i*4+2]));
|
||||
m_bulletParticles[i]->setLinearVelocity(btVector3(m_hVel[i*4],m_hVel[i*4+1],m_hVel[i*4+2])*10.);
|
||||
}
|
||||
}
|
||||
|
||||
m_dynamicsWorld->stepSimulation(deltaTime);
|
||||
|
||||
/* for (int i=0;i<m_numParticles;i++)
|
||||
{
|
||||
data[i*4+1] -= 0.001f;
|
||||
m_hVel[i*4]=0;
|
||||
m_hVel[i*4+1]=0;
|
||||
m_hVel[i*4+2]=0;
|
||||
}
|
||||
*/
|
||||
|
||||
{
|
||||
float* hPosData = m_broadphase->copyBuffersFromDeviceToHost();
|
||||
float* m_hVel = m_broadphase->getHvelPtr();
|
||||
|
||||
//sync transform and velocity from Bullet to particle system
|
||||
for (int i=0;i<m_params.numBodies;i++)
|
||||
{
|
||||
btTransform& trans = m_bulletParticles[i]->getWorldTransform();
|
||||
hPosData[i*4] = trans.getOrigin().getX();
|
||||
hPosData[i*4+1] = trans.getOrigin().getY();
|
||||
hPosData[i*4+2] = trans.getOrigin().getZ();
|
||||
|
||||
m_hVel[i*4] = m_bulletParticles[i]->getLinearVelocity().getX()/10.;
|
||||
m_hVel[i*4+1] = m_bulletParticles[i]->getLinearVelocity().getY()/10.;
|
||||
m_hVel[i*4+2] = m_bulletParticles[i]->getLinearVelocity().getZ()/10.;
|
||||
}
|
||||
|
||||
m_broadphase->copyBuffersFromHostToDevice();
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
default:
|
||||
{
|
||||
printf("unknown simulation method\n");
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
float* ParticleSystem::getArray(ParticleArray array)
|
||||
{
|
||||
return m_broadphase->getArray((btCudaBroadphase::ParticleArray)array);
|
||||
|
||||
}
|
||||
void ParticleSystem::debugDraw()
|
||||
{
|
||||
glDisable(GL_DEPTH_TEST);
|
||||
m_dynamicsWorld->debugDrawWorld();
|
||||
glEnable(GL_DEPTH_TEST);
|
||||
}
|
||||
|
||||
|
||||
void ParticleSystem::reset(ParticleConfig config)
|
||||
{
|
||||
m_broadphase->reset((btCudaBroadphase::ParticleConfig)config);
|
||||
for (int i=0;i<m_bulletParticles.size();i++)
|
||||
{
|
||||
m_bulletParticles[i]->setAngularVelocity(btVector3(0,0,0));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
void ParticleSystem::addSphere(int start, float *pos, float *vel, int r, float spacing)
|
||||
{
|
||||
m_broadphase->addSphere(start,pos,vel,r,spacing);
|
||||
}
|
||||
|
||||
unsigned int ParticleSystem::getCurrentReadBuffer() const
|
||||
{
|
||||
return m_broadphase->getCurrentReadBuffer();
|
||||
}
|
||||
unsigned int ParticleSystem::getColorBuffer() const
|
||||
{
|
||||
return m_broadphase->getColorBuffer();
|
||||
}
|
||||
|
||||
void ParticleSystem::dumpGrid()
|
||||
{
|
||||
return m_broadphase->dumpGrid();
|
||||
}
|
||||
|
||||
void ParticleSystem::dumpParticles(uint start, uint count)
|
||||
{
|
||||
m_broadphase->dumpParticles(start,count);
|
||||
}
|
||||
|
||||
int ParticleSystem::getNumParticles() const
|
||||
{
|
||||
return m_params.numBodies;
|
||||
}
|
||||
331
Extras/CUDA/particleSystem.cu
Normal file
331
Extras/CUDA/particleSystem.cu
Normal file
@@ -0,0 +1,331 @@
|
||||
/*
|
||||
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO USER:
|
||||
*
|
||||
* This source code is subject to NVIDIA ownership rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
|
||||
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
|
||||
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
|
||||
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
|
||||
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
|
||||
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
|
||||
* OR PERFORMANCE OF THIS SOURCE CODE.
|
||||
*
|
||||
* U.S. Government End Users. This source code is a "commercial item" as
|
||||
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
|
||||
* "commercial computer software" and "commercial computer software
|
||||
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
|
||||
* and is provided to the U.S. Government only as a commercial end item.
|
||||
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
|
||||
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
|
||||
* source code with only those rights set forth herein.
|
||||
*/
|
||||
|
||||
//#include <cutil.h>
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <string.h>
|
||||
|
||||
#if defined(__APPLE__) || defined(MACOSX)
|
||||
#include <GLUT/glut.h>
|
||||
#else
|
||||
#include <GL/glut.h>
|
||||
#endif
|
||||
|
||||
#include <cuda_gl_interop.h>
|
||||
|
||||
#include "particles_kernel.cu"
|
||||
#include "radixsort.cu"
|
||||
|
||||
//! Check for CUDA error
|
||||
# define CUT_CHECK_ERROR(errorMessage) do { \
|
||||
cudaError_t err = cudaGetLastError(); \
|
||||
if( cudaSuccess != err) { \
|
||||
fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \
|
||||
errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
|
||||
exit(EXIT_FAILURE); \
|
||||
} \
|
||||
err = cudaThreadSynchronize(); \
|
||||
if( cudaSuccess != err) { \
|
||||
fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \
|
||||
errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
|
||||
exit(EXIT_FAILURE); \
|
||||
} } while (0)
|
||||
|
||||
|
||||
# define MY_CUDA_SAFE_CALL_NO_SYNC( call) do { \
|
||||
cudaError err = call; \
|
||||
if( cudaSuccess != err) { \
|
||||
fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \
|
||||
__FILE__, __LINE__, cudaGetErrorString( err) ); \
|
||||
exit(EXIT_FAILURE); \
|
||||
} } while (0)
|
||||
|
||||
# define MY_CUDA_SAFE_CALL( call) do { \
|
||||
MY_CUDA_SAFE_CALL_NO_SYNC(call); \
|
||||
cudaError err = cudaThreadSynchronize(); \
|
||||
if( cudaSuccess != err) { \
|
||||
fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \
|
||||
__FILE__, __LINE__, cudaGetErrorString( err) ); \
|
||||
exit(EXIT_FAILURE); \
|
||||
} } while (0)
|
||||
|
||||
|
||||
extern "C"
|
||||
{
|
||||
|
||||
void cudaInit(int argc, char **argv)
|
||||
{
|
||||
//CUT_DEVICE_INIT(argc, argv);
|
||||
}
|
||||
|
||||
void allocateArray(void **devPtr, size_t size)
|
||||
{
|
||||
MY_CUDA_SAFE_CALL(cudaMalloc(devPtr, size));
|
||||
}
|
||||
|
||||
void freeArray(void *devPtr)
|
||||
{
|
||||
MY_CUDA_SAFE_CALL(cudaFree(devPtr));
|
||||
}
|
||||
|
||||
void threadSync()
|
||||
{
|
||||
MY_CUDA_SAFE_CALL(cudaThreadSynchronize());
|
||||
}
|
||||
|
||||
void copyArrayFromDevice(void* host, const void* device, unsigned int vbo, int size)
|
||||
{
|
||||
if (vbo)
|
||||
MY_CUDA_SAFE_CALL(cudaGLMapBufferObject((void**)&device, vbo));
|
||||
MY_CUDA_SAFE_CALL(cudaMemcpy(host, device, size, cudaMemcpyDeviceToHost));
|
||||
if (vbo)
|
||||
MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vbo));
|
||||
}
|
||||
|
||||
void copyArrayToDevice(void* device, const void* host, int offset, int size)
|
||||
{
|
||||
MY_CUDA_SAFE_CALL(cudaMemcpy((char *) device + offset, host, size, cudaMemcpyHostToDevice));
|
||||
}
|
||||
|
||||
void registerGLBufferObject(uint vbo)
|
||||
{
|
||||
MY_CUDA_SAFE_CALL(cudaGLRegisterBufferObject(vbo));
|
||||
}
|
||||
|
||||
void unregisterGLBufferObject(uint vbo)
|
||||
{
|
||||
MY_CUDA_SAFE_CALL(cudaGLUnregisterBufferObject(vbo));
|
||||
}
|
||||
|
||||
void setParameters(SimParams *hostParams)
|
||||
{
|
||||
// copy parameters to constant memory
|
||||
MY_CUDA_SAFE_CALL( cudaMemcpyToSymbol(params, hostParams, sizeof(SimParams)) );
|
||||
}
|
||||
|
||||
//Round a / b to nearest higher integer value
|
||||
int iDivUp(int a, int b){
|
||||
return (a % b != 0) ? (a / b + 1) : (a / b);
|
||||
}
|
||||
|
||||
// compute grid and thread block size for a given number of elements
|
||||
void computeGridSize(int n, int blockSize, int &numBlocks, int &numThreads)
|
||||
{
|
||||
numThreads = min(blockSize, n);
|
||||
numBlocks = iDivUp(n, numThreads);
|
||||
}
|
||||
|
||||
void
|
||||
integrateSystem(uint vboOldPos, uint vboNewPos,
|
||||
float* oldVel, float* newVel,
|
||||
float deltaTime,
|
||||
int numBodies)
|
||||
{
|
||||
int numThreads, numBlocks;
|
||||
computeGridSize(numBodies, 256, numBlocks, numThreads);
|
||||
|
||||
float *oldPos, *newPos;
|
||||
MY_CUDA_SAFE_CALL(cudaGLMapBufferObject((void**)&oldPos, vboOldPos));
|
||||
MY_CUDA_SAFE_CALL(cudaGLMapBufferObject((void**)&newPos, vboNewPos));
|
||||
|
||||
// execute the kernel
|
||||
integrate<<< numBlocks, numThreads >>>((float4*)newPos, (float4*)newVel,
|
||||
(float4*)oldPos, (float4*)oldVel,
|
||||
deltaTime);
|
||||
|
||||
// check if kernel invocation generated an error
|
||||
CUT_CHECK_ERROR("integrate kernel execution failed");
|
||||
|
||||
MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboOldPos));
|
||||
MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboNewPos));
|
||||
}
|
||||
|
||||
void
|
||||
updateGrid(uint vboPos,
|
||||
uint* gridCounters,
|
||||
uint* gridCells,
|
||||
uint numBodies,
|
||||
uint numCells)
|
||||
{
|
||||
int numThreads, numBlocks;
|
||||
computeGridSize(numBodies, 256, numBlocks, numThreads);
|
||||
|
||||
float *pos;
|
||||
MY_CUDA_SAFE_CALL(cudaGLMapBufferObject((void**)&pos, vboPos));
|
||||
|
||||
MY_CUDA_SAFE_CALL(cudaMemset(gridCounters, 0, numCells*sizeof(uint)));
|
||||
|
||||
// execute the kernel
|
||||
updateGridD<<< numBlocks, numThreads >>>((float4 *) pos,
|
||||
gridCounters,
|
||||
gridCells);
|
||||
|
||||
// check if kernel invocation generated an error
|
||||
CUT_CHECK_ERROR("Kernel execution failed");
|
||||
|
||||
MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboPos));
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
calcHash(uint vboPos,
|
||||
uint* particleHash,
|
||||
int numBodies)
|
||||
{
|
||||
int numThreads, numBlocks;
|
||||
computeGridSize(numBodies, 256, numBlocks, numThreads);
|
||||
|
||||
float *pos;
|
||||
MY_CUDA_SAFE_CALL(cudaGLMapBufferObject((void**)&pos, vboPos));
|
||||
|
||||
// execute the kernel
|
||||
calcHashD<<< numBlocks, numThreads >>>((float4 *) pos,
|
||||
(uint2 *) particleHash);
|
||||
|
||||
// check if kernel invocation generated an error
|
||||
CUT_CHECK_ERROR("Kernel execution failed");
|
||||
|
||||
MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboPos));
|
||||
}
|
||||
|
||||
void
|
||||
reorderDataAndFindCellStart(uint* particleHash,
|
||||
uint vboOldPos,
|
||||
float* oldVel,
|
||||
float* sortedPos,
|
||||
float* sortedVel,
|
||||
uint* cellStart,
|
||||
uint numBodies,
|
||||
uint numCells)
|
||||
{
|
||||
int numThreads, numBlocks;
|
||||
computeGridSize(numBodies, 256, numBlocks, numThreads);
|
||||
|
||||
MY_CUDA_SAFE_CALL(cudaMemset(cellStart, 0xffffffff, numCells*sizeof(uint)));
|
||||
|
||||
float *oldPos;
|
||||
MY_CUDA_SAFE_CALL(cudaGLMapBufferObject((void**)&oldPos, vboOldPos));
|
||||
|
||||
#if USE_TEX
|
||||
MY_CUDA_SAFE_CALL(cudaBindTexture(0, oldPosTex, oldPos, numBodies*sizeof(float4)));
|
||||
MY_CUDA_SAFE_CALL(cudaBindTexture(0, oldVelTex, oldVel, numBodies*sizeof(float4)));
|
||||
#endif
|
||||
|
||||
reorderDataAndFindCellStartD<<< numBlocks, numThreads >>>(
|
||||
(uint2 *) particleHash,
|
||||
(float4 *) oldPos,
|
||||
(float4 *) oldVel,
|
||||
(float4 *) sortedPos,
|
||||
(float4 *) sortedVel,
|
||||
(uint *) cellStart);
|
||||
CUT_CHECK_ERROR("Kernel execution failed: reorderDataAndFindCellStartD");
|
||||
|
||||
#if USE_TEX
|
||||
MY_CUDA_SAFE_CALL(cudaUnbindTexture(oldPosTex));
|
||||
MY_CUDA_SAFE_CALL(cudaUnbindTexture(oldVelTex));
|
||||
#endif
|
||||
|
||||
MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboOldPos));
|
||||
}
|
||||
|
||||
void
|
||||
collide(uint vboOldPos, uint vboNewPos,
|
||||
float* sortedPos, float* sortedVel,
|
||||
float* oldVel, float* newVel,
|
||||
uint* gridCounters,
|
||||
uint* gridCells,
|
||||
uint* particleHash,
|
||||
uint* cellStart,
|
||||
uint numBodies,
|
||||
uint numCells,
|
||||
uint maxParticlesPerCell)
|
||||
{
|
||||
float4 *oldPos, *newPos;
|
||||
MY_CUDA_SAFE_CALL(cudaGLMapBufferObject((void**)&oldPos, vboOldPos));
|
||||
MY_CUDA_SAFE_CALL(cudaGLMapBufferObject((void**)&newPos, vboNewPos));
|
||||
|
||||
#if USE_TEX
|
||||
|
||||
#if USE_SORT
|
||||
// use sorted arrays
|
||||
MY_CUDA_SAFE_CALL(cudaBindTexture(0, oldPosTex, sortedPos, numBodies*sizeof(float4)));
|
||||
MY_CUDA_SAFE_CALL(cudaBindTexture(0, oldVelTex, sortedVel, numBodies*sizeof(float4)));
|
||||
|
||||
MY_CUDA_SAFE_CALL(cudaBindTexture(0, particleHashTex, particleHash, numBodies*sizeof(uint2)));
|
||||
MY_CUDA_SAFE_CALL(cudaBindTexture(0, cellStartTex, cellStart, numCells*sizeof(uint)));
|
||||
#else
|
||||
|
||||
MY_CUDA_SAFE_CALL(cudaBindTexture(0, oldPosTex, oldPos, numBodies*sizeof(float4)));
|
||||
MY_CUDA_SAFE_CALL(cudaBindTexture(0, oldVelTex, oldVel, numBodies*sizeof(float4)));
|
||||
|
||||
MY_CUDA_SAFE_CALL(cudaBindTexture(0, gridCountersTex, gridCounters,numCells*sizeof(uint)));
|
||||
MY_CUDA_SAFE_CALL(cudaBindTexture(0, gridCellsTex, gridCells, numCells*maxParticlesPerCell*sizeof(uint)));
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
// thread per particle
|
||||
int numThreads, numBlocks;
|
||||
computeGridSize(numBodies, BLOCKDIM, numBlocks, numThreads);
|
||||
|
||||
// execute the kernel
|
||||
collideD<<< numBlocks, numThreads >>>((float4*)newPos, (float4*)newVel,
|
||||
#if USE_SORT
|
||||
(float4*)sortedPos, (float4*)sortedVel,
|
||||
(uint2 *) particleHash,
|
||||
cellStart
|
||||
#else
|
||||
(float4*)oldPos, (float4*)oldVel,
|
||||
gridCounters,
|
||||
gridCells
|
||||
#endif
|
||||
);
|
||||
|
||||
// check if kernel invocation generated an error
|
||||
CUT_CHECK_ERROR("Kernel execution failed");
|
||||
|
||||
MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboNewPos));
|
||||
MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboOldPos));
|
||||
|
||||
#if USE_TEX
|
||||
MY_CUDA_SAFE_CALL(cudaUnbindTexture(oldPosTex));
|
||||
MY_CUDA_SAFE_CALL(cudaUnbindTexture(oldVelTex));
|
||||
|
||||
#if USE_SORT
|
||||
MY_CUDA_SAFE_CALL(cudaUnbindTexture(particleHashTex));
|
||||
MY_CUDA_SAFE_CALL(cudaUnbindTexture(cellStartTex));
|
||||
#else
|
||||
MY_CUDA_SAFE_CALL(cudaUnbindTexture(gridCountersTex));
|
||||
MY_CUDA_SAFE_CALL(cudaUnbindTexture(gridCellsTex));
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
} // extern "C"
|
||||
57
Extras/CUDA/particleSystem.cuh
Normal file
57
Extras/CUDA/particleSystem.cuh
Normal file
@@ -0,0 +1,57 @@
|
||||
extern "C"
|
||||
{
|
||||
void cudaInit(int argc, char **argv);
|
||||
|
||||
void allocateArray(void **devPtr, int size);
|
||||
void freeArray(void *devPtr);
|
||||
|
||||
void threadSync();
|
||||
|
||||
void copyArrayFromDevice(void* host, const void* device, unsigned int vbo, int size);
|
||||
void copyArrayToDevice(void* device, const void* host, int offset, int size);
|
||||
void registerGLBufferObject(unsigned int vbo);
|
||||
void unregisterGLBufferObject(unsigned int vbo);
|
||||
|
||||
void setParameters(SimParams *hostParams);
|
||||
|
||||
void
|
||||
integrateSystem(uint vboOldPos, uint vboNewPos,
|
||||
float* oldVel, float* newVel,
|
||||
float deltaTime,
|
||||
int numBodies);
|
||||
|
||||
void
|
||||
updateGrid(uint vboPos,
|
||||
uint* gridCounters,
|
||||
uint* gridCells,
|
||||
uint numBodies,
|
||||
uint numCells);
|
||||
|
||||
void
|
||||
calcHash(uint vboPos,
|
||||
uint* particleHash,
|
||||
int numBodies);
|
||||
|
||||
void
|
||||
reorderDataAndFindCellStart(uint* particleHash,
|
||||
uint vboOldPos,
|
||||
float* oldVel,
|
||||
float* sortedPos,
|
||||
float* sortedVel,
|
||||
uint* cellStart,
|
||||
uint numBodies,
|
||||
uint numCells);
|
||||
|
||||
void
|
||||
collide(uint vboOldPos, uint vboNewPos,
|
||||
float* sortedPos, float* sortedVel,
|
||||
float* oldVel, float* newVel,
|
||||
uint* gridCounters,
|
||||
uint* gridCells,
|
||||
uint* particleHash,
|
||||
uint* cellStart,
|
||||
uint numBodies,
|
||||
uint numCells,
|
||||
uint maxParticlesPerCell);
|
||||
|
||||
}
|
||||
153
Extras/CUDA/particleSystem.h
Normal file
153
Extras/CUDA/particleSystem.h
Normal file
@@ -0,0 +1,153 @@
|
||||
/*
|
||||
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO USER:
|
||||
*
|
||||
* This source code is subject to NVIDIA ownership rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
|
||||
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
|
||||
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
|
||||
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
|
||||
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
|
||||
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
|
||||
* OR PERFORMANCE OF THIS SOURCE CODE.
|
||||
*
|
||||
* U.S. Government End Users. This source code is a "commercial item" as
|
||||
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
|
||||
* "commercial computer software" and "commercial computer software
|
||||
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
|
||||
* and is provided to the U.S. Government only as a commercial end item.
|
||||
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
|
||||
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
|
||||
* source code with only those rights set forth herein.
|
||||
*/
|
||||
|
||||
#ifndef __BODYSYSTEMCUDA_H__
|
||||
#define __BODYSYSTEMCUDA_H__
|
||||
|
||||
#define DEBUG_GRID 0
|
||||
#define DO_TIMING 0
|
||||
|
||||
#include "particles_kernel.cuh"
|
||||
#include "vector_functions.h"
|
||||
#include "LinearMath/btAlignedObjectArray.h"
|
||||
|
||||
// CUDA BodySystem: runs on the GPU
|
||||
class ParticleSystem
|
||||
{
|
||||
public:
|
||||
ParticleSystem(uint numParticles, uint3 gridSize);
|
||||
~ParticleSystem();
|
||||
|
||||
|
||||
enum ParticleArray
|
||||
{
|
||||
POSITION,
|
||||
VELOCITY,
|
||||
};
|
||||
|
||||
enum ParticleConfig
|
||||
{
|
||||
CONFIG_RANDOM,
|
||||
CONFIG_GRID,
|
||||
_NUM_CONFIGS
|
||||
};
|
||||
|
||||
enum SimulationMode
|
||||
{
|
||||
SIMULATION_CUDA,
|
||||
SIMULATION_BULLET_CPU,
|
||||
SIMULATION_NUM_MODES
|
||||
};
|
||||
|
||||
void reset(ParticleConfig config);
|
||||
void debugDraw();
|
||||
|
||||
|
||||
///
|
||||
///Bullet data
|
||||
///
|
||||
|
||||
void initializeBullet();
|
||||
void finalizeBullet();
|
||||
class btDiscreteDynamicsWorld* m_dynamicsWorld;
|
||||
class btDefaultCollisionConfiguration* m_collisionConfiguration;
|
||||
class btCollisionDispatcher* m_dispatcher;
|
||||
class btCudaBroadphase* m_broadphase;
|
||||
class btSequentialImpulseConstraintSolver* m_constraintSolver;
|
||||
btAlignedObjectArray<class btRigidBody*> m_bulletParticles;
|
||||
|
||||
|
||||
void update(float deltaTime);
|
||||
|
||||
float* getArray(ParticleArray array);
|
||||
|
||||
int getNumParticles() const;
|
||||
|
||||
unsigned int getCurrentReadBuffer() const;
|
||||
unsigned int getColorBuffer() const;
|
||||
|
||||
void dumpGrid();
|
||||
void dumpParticles(uint start, uint count);
|
||||
|
||||
void setIterations(int i) { m_solverIterations = i; }
|
||||
|
||||
void setDamping(float x) { m_params.globalDamping = x; }
|
||||
void setGravity(float x) { m_params.gravity = make_float3(0.0f, x, 0.0f); }
|
||||
|
||||
void setCollideSpring(float x) { m_params.spring = x; }
|
||||
void setCollideDamping(float x) { m_params.damping = x; }
|
||||
void setCollideShear(float x) { m_params.shear = x; }
|
||||
void setCollideAttraction(float x) { m_params.attraction = x; }
|
||||
|
||||
void setColliderPos(float4 x) { m_params.colliderPos = x; }
|
||||
|
||||
float getParticleRadius() { return m_params.particleRadius; }
|
||||
float4 getColliderPos() { return m_params.colliderPos; }
|
||||
float getColliderRadius() { return m_params.colliderRadius; }
|
||||
uint3 getGridSize() { return m_params.gridSize; }
|
||||
float3 getWorldOrigin() { return m_params.worldOrigin; }
|
||||
float3 getCellSize() { return m_params.cellSize; }
|
||||
|
||||
void addSphere(int index, float *pos, float *vel, int r, float spacing);
|
||||
|
||||
SimulationMode getSimulationMode() const
|
||||
{
|
||||
return m_simulationMode;
|
||||
}
|
||||
|
||||
void setSimulationMode(SimulationMode mode)
|
||||
{
|
||||
m_simulationMode=mode;
|
||||
}
|
||||
|
||||
protected: // methods
|
||||
ParticleSystem()
|
||||
: m_simulationMode(SIMULATION_CUDA)
|
||||
{}
|
||||
|
||||
void initGrid(uint *size, float spacing, float jitter, uint numParticles);
|
||||
|
||||
|
||||
protected: // data
|
||||
bool m_bInitialized;
|
||||
|
||||
|
||||
|
||||
// params
|
||||
SimParams m_params;
|
||||
uint m_maxParticlesPerCell;
|
||||
|
||||
uint m_timer;
|
||||
|
||||
uint m_solverIterations;
|
||||
|
||||
SimulationMode m_simulationMode;
|
||||
};
|
||||
|
||||
#endif // __BODYSYSTEMCUDA_H__
|
||||
544
Extras/CUDA/particles.cpp
Normal file
544
Extras/CUDA/particles.cpp
Normal file
@@ -0,0 +1,544 @@
|
||||
/*
|
||||
* Copyright 1993-2007 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO USER:
|
||||
*
|
||||
* This source code is subject to NVIDIA ownership rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
|
||||
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
|
||||
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
|
||||
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
|
||||
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
|
||||
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
|
||||
* OR PERFORMANCE OF THIS SOURCE CODE.
|
||||
*
|
||||
* U.S. Government End Users. This source code is a "commercial item" as
|
||||
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
|
||||
* "commercial computer software" and "commercial computer software
|
||||
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
|
||||
* and is provided to the U.S. Government only as a commercial end item.
|
||||
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
|
||||
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
|
||||
* source code with only those rights set forth herein.
|
||||
*/
|
||||
|
||||
/*
|
||||
Particle system example with collisions using uniform grid
|
||||
*/
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <algorithm>
|
||||
#include <math.h>
|
||||
//#include <cutil.h>
|
||||
|
||||
#include <GL/glew.h>
|
||||
|
||||
#if defined(__APPLE__) || defined(MACOSX)
|
||||
#include <GLUT/glut.h>
|
||||
#else
|
||||
#include <GL/glut.h>
|
||||
#endif
|
||||
|
||||
#include "particleSystem.h"
|
||||
#include "render_particles.h"
|
||||
#include "paramgl.h"
|
||||
|
||||
// view params
|
||||
int ox, oy;
|
||||
int buttonState = 0;
|
||||
float camera_trans[] = {0, 0, -3};
|
||||
float camera_rot[] = {0, 0, 0};
|
||||
float camera_trans_lag[] = {0, 0, -3};
|
||||
float camera_rot_lag[] = {0, 0, 0};
|
||||
const float inertia = 0.1;
|
||||
ParticleRenderer::DisplayMode displayMode = ParticleRenderer::PARTICLE_SPHERES;
|
||||
|
||||
|
||||
|
||||
int mode = 0;
|
||||
bool displayEnabled = true;
|
||||
bool bPause = false;
|
||||
bool displaySliders = false;
|
||||
bool wireframe = false;
|
||||
|
||||
enum { M_VIEW = 0, M_MOVE };
|
||||
|
||||
uint numParticles = 0;
|
||||
uint3 gridSize;
|
||||
int numIterations = 0; // run until exit
|
||||
|
||||
// simulation parameters
|
||||
float timestep = 0.5f;
|
||||
float damping = 1.0f;
|
||||
float gravity = 0.0003f;
|
||||
int iterations = 1;
|
||||
int ballr = 10;
|
||||
|
||||
float collideSpring = 0.5f;;
|
||||
float collideDamping = 0.02f;;
|
||||
float collideShear = 0.1f;
|
||||
float collideAttraction = 0.0f;
|
||||
|
||||
ParticleSystem *psystem = 0;
|
||||
|
||||
// fps
|
||||
|
||||
ParticleRenderer *renderer = 0;
|
||||
|
||||
float modelView[16];
|
||||
|
||||
ParamListGL *params;
|
||||
|
||||
extern "C" void cudaInit(int argc, char **argv);
|
||||
|
||||
void init(int numParticles, uint3 gridSize)
|
||||
{
|
||||
psystem = new ParticleSystem(numParticles, gridSize);
|
||||
psystem->reset(ParticleSystem::CONFIG_GRID);
|
||||
|
||||
renderer = new ParticleRenderer;
|
||||
renderer->setParticleRadius(psystem->getParticleRadius());
|
||||
renderer->setColorBuffer(psystem->getColorBuffer());
|
||||
|
||||
|
||||
}
|
||||
|
||||
void initGL()
|
||||
{
|
||||
glewInit();
|
||||
if (!glewIsSupported("GL_VERSION_2_0 GL_VERSION_1_5 GL_ARB_multitexture GL_ARB_vertex_buffer_object")) {
|
||||
fprintf(stderr, "Required OpenGL extensions missing.");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
glEnable(GL_DEPTH_TEST);
|
||||
glClearColor(0.25, 0.25, 0.25, 1.0);
|
||||
|
||||
glutReportErrors();
|
||||
}
|
||||
|
||||
|
||||
void display()
|
||||
{
|
||||
|
||||
// update the simulation
|
||||
if (!bPause)
|
||||
{
|
||||
psystem->setIterations(iterations);
|
||||
psystem->setDamping(damping);
|
||||
psystem->setGravity(-gravity);
|
||||
psystem->setCollideSpring(collideSpring);
|
||||
psystem->setCollideDamping(collideDamping);
|
||||
psystem->setCollideShear(collideShear);
|
||||
psystem->setCollideAttraction(collideAttraction);
|
||||
|
||||
psystem->update(timestep);
|
||||
renderer->setVertexBuffer(psystem->getCurrentReadBuffer(), psystem->getNumParticles());
|
||||
float* posArray = psystem->getArray(ParticleSystem::POSITION);
|
||||
renderer->setPositions(posArray,psystem->getNumParticles());
|
||||
}
|
||||
|
||||
// render
|
||||
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
|
||||
|
||||
// view transform
|
||||
glMatrixMode(GL_MODELVIEW);
|
||||
glLoadIdentity();
|
||||
for (int c = 0; c < 3; ++c)
|
||||
{
|
||||
camera_trans_lag[c] += (camera_trans[c] - camera_trans_lag[c]) * inertia;
|
||||
camera_rot_lag[c] += (camera_rot[c] - camera_rot_lag[c]) * inertia;
|
||||
}
|
||||
glTranslatef(camera_trans_lag[0], camera_trans_lag[1], camera_trans_lag[2]);
|
||||
glRotatef(camera_rot_lag[0], 1.0, 0.0, 0.0);
|
||||
glRotatef(camera_rot_lag[1], 0.0, 1.0, 0.0);
|
||||
|
||||
glGetFloatv(GL_MODELVIEW_MATRIX, modelView);
|
||||
|
||||
// cube
|
||||
glColor3f(1.0, 1.0, 1.0);
|
||||
glutWireCube(2.0);
|
||||
|
||||
// collider
|
||||
glPushMatrix();
|
||||
float4 p = psystem->getColliderPos();
|
||||
glTranslatef(p.x, p.y, p.z);
|
||||
glColor3f(1.0, 0.0, 0.0);
|
||||
glutSolidSphere(psystem->getColliderRadius(), 20, 10);
|
||||
glPopMatrix();
|
||||
|
||||
if (displayEnabled)
|
||||
{
|
||||
renderer->display(displayMode);
|
||||
}
|
||||
|
||||
if (displaySliders) {
|
||||
glDisable(GL_DEPTH_TEST);
|
||||
glBlendFunc(GL_ONE_MINUS_DST_COLOR, GL_ZERO); // invert color
|
||||
glEnable(GL_BLEND);
|
||||
params->Render(0, 0);
|
||||
glDisable(GL_BLEND);
|
||||
glEnable(GL_DEPTH_TEST);
|
||||
}
|
||||
|
||||
psystem->debugDraw();
|
||||
|
||||
glutSwapBuffers();
|
||||
|
||||
{
|
||||
char fps[256];
|
||||
//float ifps = 1.f / (cutGetAverageTimerValue(timer) / 1000.f);
|
||||
switch (psystem->getSimulationMode())
|
||||
{
|
||||
case ParticleSystem::SIMULATION_CUDA:
|
||||
{
|
||||
sprintf(fps, "CUDA particles (%d particles)", numParticles);
|
||||
break;
|
||||
}
|
||||
case ParticleSystem::SIMULATION_BULLET_CPU:
|
||||
{
|
||||
sprintf(fps, "Bullet btCudaBroadphase (%d btSphereShapes)", numParticles);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
sprintf(fps, "Unknown simulation mode");
|
||||
}
|
||||
}
|
||||
glutSetWindowTitle(fps);
|
||||
}
|
||||
|
||||
glutReportErrors();
|
||||
}
|
||||
|
||||
void reshape(int w, int h)
|
||||
{
|
||||
glMatrixMode(GL_PROJECTION);
|
||||
glLoadIdentity();
|
||||
gluPerspective(60.0, (float) w / (float) h, 0.1, 10.0);
|
||||
|
||||
glMatrixMode(GL_MODELVIEW);
|
||||
glViewport(0, 0, w, h);
|
||||
|
||||
renderer->setWindowSize(w, h);
|
||||
renderer->setFOV(60.0);
|
||||
}
|
||||
|
||||
void mouse(int button, int state, int x, int y)
|
||||
{
|
||||
int mods;
|
||||
|
||||
if (state == GLUT_DOWN)
|
||||
buttonState |= 1<<button;
|
||||
else if (state == GLUT_UP)
|
||||
buttonState = 0;
|
||||
|
||||
mods = glutGetModifiers();
|
||||
if (mods & GLUT_ACTIVE_SHIFT) {
|
||||
buttonState = 2;
|
||||
} else if (mods & GLUT_ACTIVE_CTRL) {
|
||||
buttonState = 3;
|
||||
}
|
||||
|
||||
ox = x; oy = y;
|
||||
|
||||
if (displaySliders) {
|
||||
if (params->Mouse(x, y, button, state)) {
|
||||
glutPostRedisplay();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
glutPostRedisplay();
|
||||
}
|
||||
|
||||
// transfrom vector by matrix
|
||||
void xform(float *v, float *r, GLfloat *m)
|
||||
{
|
||||
r[0] = v[0]*m[0] + v[1]*m[4] + v[2]*m[8] + m[12];
|
||||
r[1] = v[0]*m[1] + v[1]*m[5] + v[2]*m[9] + m[13];
|
||||
r[2] = v[0]*m[2] + v[1]*m[6] + v[2]*m[10] + m[14];
|
||||
}
|
||||
|
||||
// transform vector by transpose of matrix
|
||||
void ixform(float *v, float *r, GLfloat *m)
|
||||
{
|
||||
r[0] = v[0]*m[0] + v[1]*m[1] + v[2]*m[2];
|
||||
r[1] = v[0]*m[4] + v[1]*m[5] + v[2]*m[6];
|
||||
r[2] = v[0]*m[8] + v[1]*m[9] + v[2]*m[10];
|
||||
}
|
||||
|
||||
void ixformPoint(float *v, float *r, GLfloat *m)
|
||||
{
|
||||
float x[4];
|
||||
x[0] = v[0] - m[12];
|
||||
x[1] = v[1] - m[13];
|
||||
x[2] = v[2] - m[14];
|
||||
x[3] = 1.0f;
|
||||
ixform(x, r, m);
|
||||
}
|
||||
|
||||
void motion(int x, int y)
|
||||
{
|
||||
float dx, dy;
|
||||
dx = x - ox;
|
||||
dy = y - oy;
|
||||
|
||||
if (displaySliders) {
|
||||
if (params->Motion(x, y)) {
|
||||
ox = x; oy = y;
|
||||
glutPostRedisplay();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
switch(mode)
|
||||
{
|
||||
case M_VIEW:
|
||||
if (buttonState == 3) {
|
||||
// left+middle = zoom
|
||||
camera_trans[2] += (dy / 100.0) * 0.5 * fabs(camera_trans[2]);
|
||||
}
|
||||
else if (buttonState & 2) {
|
||||
// middle = translate
|
||||
camera_trans[0] += dx / 100.0;
|
||||
camera_trans[1] -= dy / 100.0;
|
||||
}
|
||||
else if (buttonState & 1) {
|
||||
// left = rotate
|
||||
camera_rot[0] += dy / 5.0;
|
||||
camera_rot[1] += dx / 5.0;
|
||||
}
|
||||
break;
|
||||
|
||||
case M_MOVE:
|
||||
{
|
||||
float translateSpeed = 0.003f;
|
||||
float4 p = psystem->getColliderPos();
|
||||
if (buttonState==1) {
|
||||
float v[3], r[3];
|
||||
v[0] = dx*translateSpeed;
|
||||
v[1] = -dy*translateSpeed;
|
||||
v[2] = 0.0f;
|
||||
ixform(v, r, modelView);
|
||||
p.x += r[0];
|
||||
p.y += r[1];
|
||||
p.z += r[2];
|
||||
} else if (buttonState==2) {
|
||||
float v[3], r[3];
|
||||
v[0] = 0.0f;
|
||||
v[1] = 0.0f;
|
||||
v[2] = dy*translateSpeed;
|
||||
ixform(v, r, modelView);
|
||||
p.x += r[0];
|
||||
p.y += r[1];
|
||||
p.z += r[2];
|
||||
}
|
||||
psystem->setColliderPos(p);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
ox = x; oy = y;
|
||||
glutPostRedisplay();
|
||||
}
|
||||
|
||||
inline float frand()
|
||||
{
|
||||
return rand() / (float) RAND_MAX;
|
||||
}
|
||||
|
||||
// commented out to remove unused parameter warnings in Linux
|
||||
void key(unsigned char key, int /*x*/, int /*y*/)
|
||||
{
|
||||
switch (key)
|
||||
{
|
||||
case ' ':
|
||||
bPause = !bPause;
|
||||
break;
|
||||
case 13:
|
||||
psystem->update(timestep);
|
||||
renderer->setVertexBuffer(psystem->getCurrentReadBuffer(), psystem->getNumParticles());
|
||||
break;
|
||||
case '\033':
|
||||
case 'q':
|
||||
exit(0);
|
||||
break;
|
||||
case 'v':
|
||||
mode = M_VIEW;
|
||||
break;
|
||||
case 'm':
|
||||
mode = M_MOVE;
|
||||
break;
|
||||
case 's':
|
||||
psystem->setSimulationMode((ParticleSystem::SimulationMode) ((psystem->getSimulationMode() + 1) % ParticleSystem::SIMULATION_NUM_MODES));
|
||||
break;
|
||||
|
||||
case 'p':
|
||||
displayMode = (ParticleRenderer::DisplayMode) ((displayMode + 1) % ParticleRenderer::PARTICLE_NUM_MODES);
|
||||
break;
|
||||
case 'd':
|
||||
psystem->dumpGrid();
|
||||
break;
|
||||
case 'u':
|
||||
psystem->dumpParticles(0, 1);
|
||||
break;
|
||||
|
||||
case 'r':
|
||||
displayEnabled = !displayEnabled;
|
||||
break;
|
||||
|
||||
case '1':
|
||||
psystem->reset(ParticleSystem::CONFIG_GRID);
|
||||
break;
|
||||
case '2':
|
||||
psystem->reset(ParticleSystem::CONFIG_RANDOM);
|
||||
break;
|
||||
case '3':
|
||||
{
|
||||
// inject a sphere of particles
|
||||
float pr = psystem->getParticleRadius();
|
||||
float tr = pr+(pr*2.0f)*ballr;
|
||||
float pos[4], vel[4];
|
||||
pos[0] = -1.0 + tr + frand()*(2.0f - tr*2.0f);
|
||||
pos[1] = 1.0f - tr;
|
||||
pos[2] = -1.0 + tr + frand()*(2.0f - tr*2.0f);
|
||||
pos[3] = 0.0f;
|
||||
vel[0] = vel[1] = vel[2] = vel[3] = 0.0f;
|
||||
psystem->addSphere(0, pos, vel, ballr, pr*2.0f);
|
||||
}
|
||||
break;
|
||||
case '4':
|
||||
{
|
||||
// shoot ball from camera
|
||||
float pr = psystem->getParticleRadius();
|
||||
float vel[4], velw[4], pos[4], posw[4];
|
||||
vel[0] = 0.0f;
|
||||
vel[1] = 0.0f;
|
||||
vel[2] = -0.05f;
|
||||
vel[3] = 0.0f;
|
||||
ixform(vel, velw, modelView);
|
||||
|
||||
pos[0] = 0.0f;
|
||||
pos[1] = 0.0f;
|
||||
pos[2] = -2.5f;
|
||||
pos[3] = 1.0;
|
||||
ixformPoint(pos, posw, modelView);
|
||||
posw[3] = 0.0f;
|
||||
|
||||
psystem->addSphere(0, posw, velw, ballr, pr*2.0f);
|
||||
}
|
||||
break;
|
||||
|
||||
case 'w':
|
||||
wireframe = !wireframe;
|
||||
break;
|
||||
|
||||
case 'h':
|
||||
displaySliders = !displaySliders;
|
||||
break;
|
||||
}
|
||||
|
||||
glutPostRedisplay();
|
||||
}
|
||||
|
||||
void special(int k, int x, int y)
|
||||
{
|
||||
if (displaySliders) {
|
||||
params->Special(k, x, y);
|
||||
}
|
||||
}
|
||||
|
||||
void idle(void)
|
||||
{
|
||||
glutPostRedisplay();
|
||||
}
|
||||
|
||||
void initParams()
|
||||
{
|
||||
// create a new parameter list
|
||||
params = new ParamListGL("misc");
|
||||
params->AddParam(new Param<float>("time step", timestep, 0.0, 1.0, 0.01, ×tep));
|
||||
params->AddParam(new Param<int>("iterations", iterations, 0, 10, 1, &iterations));
|
||||
params->AddParam(new Param<float>("damping", damping, 0.0, 1.0, 0.001, &damping));
|
||||
params->AddParam(new Param<float>("gravity", gravity, 0.0, 0.001, 0.0001, &gravity));
|
||||
params->AddParam(new Param<int>("ball r", ballr, 1, 20, 1, &ballr));
|
||||
|
||||
params->AddParam(new Param<float>("collide spring", collideSpring, 0.0, 1.0, 0.001, &collideSpring));
|
||||
params->AddParam(new Param<float>("collide damping", collideDamping, 0.0, 0.1, 0.001, &collideDamping));
|
||||
params->AddParam(new Param<float>("collide shear", collideShear, 0.0, 0.1, 0.001, &collideShear));
|
||||
params->AddParam(new Param<float>("collide attract", collideAttraction, 0.0, 0.1, 0.001, &collideAttraction));
|
||||
}
|
||||
|
||||
void mainMenu(int i)
|
||||
{
|
||||
key((unsigned char) i, 0, 0);
|
||||
}
|
||||
|
||||
void initMenus()
|
||||
{
|
||||
glutCreateMenu(mainMenu);
|
||||
glutAddMenuEntry("Reset block [1]", '1');
|
||||
glutAddMenuEntry("Reset random [2]", '2');
|
||||
glutAddMenuEntry("Add sphere [3]", '3');
|
||||
glutAddMenuEntry("View mode [v]", 'v');
|
||||
glutAddMenuEntry("Move cursor mode [m]", 'm');
|
||||
glutAddMenuEntry("Toggle point rendering [p]", 'p');
|
||||
glutAddMenuEntry("Toggle Bullet simulation[s]", 's');
|
||||
glutAddMenuEntry("Toggle animation [ ]", ' ');
|
||||
glutAddMenuEntry("Step animation [ret]", 13);
|
||||
glutAddMenuEntry("Toggle sliders [h]", 'h');
|
||||
glutAddMenuEntry("Quit (esc)", '\033');
|
||||
glutAttachMenu(GLUT_RIGHT_BUTTON);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Program main
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
int
|
||||
main(int argc, char** argv)
|
||||
{
|
||||
numParticles =1024;//1024;//64;//16380;//32768;
|
||||
uint gridDim = 64;
|
||||
numIterations = 0;
|
||||
|
||||
gridSize.x = gridSize.y = gridSize.z = gridDim;
|
||||
printf("grid: %d x %d x %d = %d cells\n", gridSize.x, gridSize.y, gridSize.z, gridSize.x*gridSize.y*gridSize.z);
|
||||
|
||||
bool benchmark = false;
|
||||
|
||||
cudaInit(argc, argv);
|
||||
|
||||
glutInit(&argc, argv);
|
||||
glutInitDisplayMode(GLUT_RGB | GLUT_DEPTH | GLUT_DOUBLE);
|
||||
glutInitWindowSize(640, 480);
|
||||
glutCreateWindow("CUDA particles");
|
||||
|
||||
initGL();
|
||||
init(numParticles, gridSize);
|
||||
initParams();
|
||||
initMenus();
|
||||
|
||||
|
||||
glutDisplayFunc(display);
|
||||
glutReshapeFunc(reshape);
|
||||
glutMouseFunc(mouse);
|
||||
glutMotionFunc(motion);
|
||||
glutKeyboardFunc(key);
|
||||
glutSpecialFunc(special);
|
||||
glutIdleFunc(idle);
|
||||
|
||||
glutMainLoop();
|
||||
|
||||
|
||||
if (psystem)
|
||||
delete psystem;
|
||||
|
||||
return 0;
|
||||
}
|
||||
381
Extras/CUDA/particles_kernel.cu
Normal file
381
Extras/CUDA/particles_kernel.cu
Normal file
@@ -0,0 +1,381 @@
|
||||
/*
|
||||
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO USER:
|
||||
*
|
||||
* This source code is subject to NVIDIA ownership rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
|
||||
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
|
||||
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
|
||||
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
|
||||
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
|
||||
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
|
||||
* OR PERFORMANCE OF THIS SOURCE CODE.
|
||||
*
|
||||
* U.S. Government End Users. This source code is a "commercial item" as
|
||||
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
|
||||
* "commercial computer software" and "commercial computer software
|
||||
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
|
||||
* and is provided to the U.S. Government only as a commercial end item.
|
||||
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
|
||||
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
|
||||
* source code with only those rights set forth herein.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Device code.
|
||||
*/
|
||||
|
||||
#ifndef _PARTICLES_KERNEL_H_
|
||||
#define _PARTICLES_KERNEL_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include "cutil_math.h"
|
||||
#include "math_constants.h"
|
||||
#include "particles_kernel.cuh"
|
||||
|
||||
#if USE_TEX
|
||||
// textures for particle position and velocity
|
||||
texture<float4, 1, cudaReadModeElementType> oldPosTex;
|
||||
texture<float4, 1, cudaReadModeElementType> oldVelTex;
|
||||
|
||||
texture<uint2, 1, cudaReadModeElementType> particleHashTex;
|
||||
texture<uint, 1, cudaReadModeElementType> cellStartTex;
|
||||
|
||||
texture<uint, 1, cudaReadModeElementType> gridCountersTex;
|
||||
texture<uint, 1, cudaReadModeElementType> gridCellsTex;
|
||||
#endif
|
||||
|
||||
__constant__ SimParams params;
|
||||
|
||||
// integrate particle attributes
|
||||
__global__ void
|
||||
integrate(float4* newPos, float4* newVel,
|
||||
float4* oldPos, float4* oldVel,
|
||||
float deltaTime)
|
||||
{
|
||||
int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
|
||||
|
||||
float4 pos4 = oldPos[index];
|
||||
float4 vel4 = oldVel[index];
|
||||
float3 pos = make_float3(pos4);
|
||||
float3 vel = make_float3(vel4);
|
||||
|
||||
vel += params.gravity * deltaTime;
|
||||
vel *= params.globalDamping;
|
||||
|
||||
// new position = old position + velocity * deltaTime
|
||||
pos += vel * deltaTime;
|
||||
|
||||
// bounce off cube sides
|
||||
if (pos.x > 1.0f - params.particleRadius) { pos.x = 1.0f - params.particleRadius; vel.x *= params.boundaryDamping; }
|
||||
if (pos.x < -1.0f + params.particleRadius) { pos.x = -1.0f + params.particleRadius; vel.x *= params.boundaryDamping;}
|
||||
if (pos.y > 1.0f - params.particleRadius) { pos.y = 1.0f - params.particleRadius; vel.y *= params.boundaryDamping; }
|
||||
if (pos.y < -1.0f + params.particleRadius) { pos.y = -1.0f + params.particleRadius; vel.y *= params.boundaryDamping;}
|
||||
if (pos.z > 1.0f - params.particleRadius) { pos.z = 1.0f - params.particleRadius; vel.z *= params.boundaryDamping; }
|
||||
if (pos.z < -1.0f + params.particleRadius) { pos.z = -1.0f + params.particleRadius; vel.z *= params.boundaryDamping;}
|
||||
|
||||
// store new position and velocity
|
||||
newPos[index] = make_float4(pos, pos4.w);
|
||||
newVel[index] = make_float4(vel, vel4.w);
|
||||
}
|
||||
|
||||
// calculate position in uniform grid
|
||||
__device__ int3 calcGridPos(float4 p)
|
||||
{
|
||||
int3 gridPos;
|
||||
gridPos.x = floor((p.x - params.worldOrigin.x) / params.cellSize.x);
|
||||
gridPos.y = floor((p.y - params.worldOrigin.y) / params.cellSize.y);
|
||||
gridPos.z = floor((p.z - params.worldOrigin.z) / params.cellSize.z);
|
||||
return gridPos;
|
||||
}
|
||||
|
||||
// calculate address in grid from position (clamping to edges)
|
||||
__device__ uint calcGridHash(int3 gridPos)
|
||||
{
|
||||
gridPos.x = max(0, min(gridPos.x, params.gridSize.x-1));
|
||||
gridPos.y = max(0, min(gridPos.y, params.gridSize.y-1));
|
||||
gridPos.z = max(0, min(gridPos.z, params.gridSize.z-1));
|
||||
return __mul24(__mul24(gridPos.z, params.gridSize.y), params.gridSize.x) + __mul24(gridPos.y, params.gridSize.x) + gridPos.x;
|
||||
}
|
||||
|
||||
// add particle to cell using atomics
|
||||
__device__ void addParticleToCell(int3 gridPos,
|
||||
uint index,
|
||||
uint* gridCounters,
|
||||
uint* gridCells)
|
||||
{
|
||||
// calculate grid hash
|
||||
uint gridHash = calcGridHash(gridPos);
|
||||
|
||||
// increment cell counter using atomics
|
||||
#if defined CUDA_NO_SM_11_ATOMIC_INTRINSICS
|
||||
int counter = 0;
|
||||
#else
|
||||
int counter = atomicAdd(&gridCounters[gridHash], 1); // returns previous value
|
||||
counter = min(counter, params.maxParticlesPerCell-1);
|
||||
#endif
|
||||
|
||||
// write particle index into this cell (very uncoalesced!)
|
||||
gridCells[gridHash*params.maxParticlesPerCell + counter] = index;
|
||||
}
|
||||
|
||||
|
||||
// update uniform grid
|
||||
__global__ void
|
||||
updateGridD(float4* pos,
|
||||
uint* gridCounters,
|
||||
uint* gridCells)
|
||||
{
|
||||
int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
|
||||
float4 p = pos[index];
|
||||
|
||||
// get address in grid
|
||||
int3 gridPos = calcGridPos(p);
|
||||
|
||||
addParticleToCell(gridPos, index, gridCounters, gridCells);
|
||||
}
|
||||
|
||||
// calculate grid hash value for each particle
|
||||
__global__ void
|
||||
calcHashD(float4* pos,
|
||||
uint2* particleHash)
|
||||
{
|
||||
int index = __mul24(blockIdx.x, blockDim.x) + threadIdx.x;
|
||||
float4 p = pos[index];
|
||||
|
||||
// get address in grid
|
||||
int3 gridPos = calcGridPos(p);
|
||||
uint gridHash = calcGridHash(gridPos);
|
||||
|
||||
// store grid hash and particle index
|
||||
particleHash[index] = make_uint2(gridHash, index);
|
||||
}
|
||||
|
||||
// rearrange particle data into sorted order, and find the start of each cell in the
|
||||
// sorted hash array
|
||||
__global__ void
|
||||
reorderDataAndFindCellStartD(uint2* particleHash, // particle id sorted by hash
|
||||
float4* oldPos,
|
||||
float4* oldVel,
|
||||
float4* sortedPos,
|
||||
float4* sortedVel,
|
||||
uint* cellStart)
|
||||
{
|
||||
int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
|
||||
|
||||
uint2 sortedData = particleHash[index];
|
||||
|
||||
// Load hash data into shared memory so that we can look
|
||||
// at neighboring particle's hash value without loading
|
||||
// two hash values per thread
|
||||
__shared__ uint sharedHash[257];
|
||||
sharedHash[threadIdx.x+1] = sortedData.x;
|
||||
if (index > 0 && threadIdx.x == 0)
|
||||
{
|
||||
// first thread in block must load neighbor particle hash
|
||||
volatile uint2 prevData = particleHash[index-1];
|
||||
sharedHash[0] = prevData.x;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
if (index == 0 || sortedData.x != sharedHash[threadIdx.x])
|
||||
{
|
||||
cellStart[sortedData.x] = index;
|
||||
}
|
||||
|
||||
// Now use the sorted index to reorder the pos and vel data
|
||||
float4 pos = FETCH(oldPos, sortedData.y); // macro does either global read or texture fetch
|
||||
float4 vel = FETCH(oldVel, sortedData.y); // see particles_kernel.cuh
|
||||
|
||||
sortedPos[index] = pos;
|
||||
sortedVel[index] = vel;
|
||||
|
||||
}
|
||||
|
||||
// collide two spheres using DEM method
|
||||
__device__ float3 collideSpheres(float4 posA, float4 posB,
|
||||
float4 velA, float4 velB,
|
||||
float radiusA, float radiusB,
|
||||
float attraction)
|
||||
{
|
||||
// calculate relative position
|
||||
float3 relPos;
|
||||
relPos.x = posB.x - posA.x;
|
||||
relPos.y = posB.y - posA.y;
|
||||
relPos.z = posB.z - posA.z;
|
||||
|
||||
float dist = length(relPos);
|
||||
float collideDist = radiusA + radiusB;
|
||||
|
||||
float3 force = make_float3(0.0f);
|
||||
if (dist < collideDist) {
|
||||
float3 norm = relPos / dist;
|
||||
|
||||
// relative velocity
|
||||
float3 relVel;
|
||||
relVel.x = velB.x - velA.x;
|
||||
relVel.y = velB.y - velA.y;
|
||||
relVel.z = velB.z - velA.z;
|
||||
|
||||
// relative tangential velocity
|
||||
float3 tanVel = relVel - (dot(relVel, norm) * norm);
|
||||
|
||||
// spring force
|
||||
force = -params.spring*(collideDist - dist) * norm;
|
||||
// dashpot (damping) force
|
||||
force += params.damping*relVel;
|
||||
// tangential shear force
|
||||
force += params.shear*tanVel;
|
||||
// attraction
|
||||
force += attraction*relPos;
|
||||
}
|
||||
|
||||
return force;
|
||||
}
|
||||
|
||||
|
||||
// collide particle with all particles in a given cell
|
||||
// version using grid built with atomics
|
||||
__device__
|
||||
float3 collideCell(int3 gridPos,
|
||||
uint index,
|
||||
float4 pos,
|
||||
float4 vel,
|
||||
float4* oldPos,
|
||||
float4* oldVel,
|
||||
uint* gridCounters,
|
||||
uint* gridCells)
|
||||
{
|
||||
float3 force = make_float3(0.0f);
|
||||
|
||||
if ((gridPos.x < 0) || (gridPos.x > params.gridSize.x-1) ||
|
||||
(gridPos.y < 0) || (gridPos.y > params.gridSize.y-1) ||
|
||||
(gridPos.z < 0) || (gridPos.z > params.gridSize.z-1)) {
|
||||
return force;
|
||||
}
|
||||
|
||||
uint gridHash = calcGridHash(gridPos);
|
||||
|
||||
// iterate over particles in this cell
|
||||
uint particlesInCell = FETCH(gridCounters, gridHash);
|
||||
particlesInCell = min(particlesInCell, params.maxParticlesPerCell-1);
|
||||
|
||||
for(uint i=0; i<particlesInCell; i++) {
|
||||
uint index2 = FETCH(gridCells, gridHash*params.maxParticlesPerCell + i);
|
||||
|
||||
if (index2 != index) { // check not colliding with self
|
||||
float4 pos2 = FETCH(oldPos, index2);
|
||||
float4 vel2 = FETCH(oldVel, index2);
|
||||
|
||||
// collide two spheres
|
||||
float3 projVec = collideSpheres(pos, pos2, vel, vel2, params.particleRadius, params.particleRadius, params.attraction);
|
||||
force += projVec;
|
||||
}
|
||||
}
|
||||
|
||||
return force;
|
||||
}
|
||||
|
||||
|
||||
// version using sorted grid
|
||||
__device__
|
||||
float3 collideCell2(int3 gridPos,
|
||||
uint index,
|
||||
float4 pos,
|
||||
float4 vel,
|
||||
float4* oldPos,
|
||||
float4* oldVel,
|
||||
uint2* particleHash,
|
||||
uint* cellStart)
|
||||
{
|
||||
float3 force = make_float3(0.0f);
|
||||
|
||||
if ((gridPos.x < 0) || (gridPos.x > params.gridSize.x-1) ||
|
||||
(gridPos.y < 0) || (gridPos.y > params.gridSize.y-1) ||
|
||||
(gridPos.z < 0) || (gridPos.z > params.gridSize.z-1)) {
|
||||
return force;
|
||||
}
|
||||
|
||||
uint gridHash = calcGridHash(gridPos);
|
||||
|
||||
// get start of bucket for this cell
|
||||
uint bucketStart = FETCH(cellStart, gridHash);
|
||||
if (bucketStart == 0xffffffff)
|
||||
return force; // cell empty
|
||||
|
||||
// iterate over particles in this cell
|
||||
for(uint i=0; i<params.maxParticlesPerCell; i++) {
|
||||
uint index2 = bucketStart + i;
|
||||
uint2 cellData = FETCH(particleHash, index2);
|
||||
if (cellData.x != gridHash) break; // no longer in same bucket
|
||||
|
||||
if (index2 != index) { // check not colliding with self
|
||||
float4 pos2 = FETCH(oldPos, index2);
|
||||
float4 vel2 = FETCH(oldVel, index2);
|
||||
|
||||
// collide two spheres
|
||||
float3 projVec = collideSpheres(pos, pos2, vel, vel2, params.particleRadius, params.particleRadius, params.attraction);
|
||||
force += projVec;
|
||||
}
|
||||
}
|
||||
|
||||
return force;
|
||||
}
|
||||
|
||||
|
||||
__global__ void
|
||||
collideD(float4* newPos, float4* newVel,
|
||||
float4* oldPos, float4* oldVel,
|
||||
#if USE_SORT
|
||||
uint2* particleHash,
|
||||
uint* cellStart
|
||||
#else
|
||||
uint* gridCounters,
|
||||
uint* gridCells
|
||||
#endif
|
||||
)
|
||||
{
|
||||
int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
|
||||
|
||||
// read particle data from sorted arrays
|
||||
float4 pos = FETCH(oldPos, index);
|
||||
float4 vel = FETCH(oldVel, index);
|
||||
|
||||
// get address in grid
|
||||
int3 gridPos = calcGridPos(pos);
|
||||
|
||||
float3 force = make_float3(0.0f);
|
||||
|
||||
// examine only neighbouring cells
|
||||
for(int z=-1; z<=1; z++) {
|
||||
for(int y=-1; y<=1; y++) {
|
||||
for(int x=-1; x<=1; x++) {
|
||||
#if USE_SORT
|
||||
force += collideCell2(gridPos + make_int3(x, y, z), index, pos, vel, oldPos, oldVel, particleHash, cellStart);
|
||||
#else
|
||||
force += collideCell(gridPos + make_int3(x, y, z), index, pos, vel, oldPos, oldVel, gridCounters, gridCells);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
float3 projVec = collideSpheres(pos, params.colliderPos, vel, make_float4(0.0f, 0.0f, 0.0f, 0.0f), params.particleRadius, params.colliderRadius, 0.0f);
|
||||
force += projVec;
|
||||
|
||||
#if USE_SORT
|
||||
// write new velocity back to original unsorted location
|
||||
volatile uint2 sortedData = particleHash[index];
|
||||
newVel[sortedData.y] = vel + make_float4(force, 0.0f);
|
||||
#else
|
||||
newVel[index] = vel + make_float4(force, 0.0f);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
47
Extras/CUDA/particles_kernel.cuh
Normal file
47
Extras/CUDA/particles_kernel.cuh
Normal file
@@ -0,0 +1,47 @@
|
||||
#ifndef PARTICLES_KERNEL_H
|
||||
#define PARTICLES_KERNEL_H
|
||||
|
||||
#define BLOCKDIM 64
|
||||
#define USE_SORT 1
|
||||
|
||||
#ifndef __DEVICE_EMULATION__
|
||||
#define USE_TEX 1
|
||||
#endif
|
||||
|
||||
#ifdef USE_TEX
|
||||
#define FETCH(t, i) tex1Dfetch(t##Tex, i)
|
||||
#else
|
||||
#define FETCH(t, i) t[i]
|
||||
#endif
|
||||
|
||||
#include "vector_types.h"
|
||||
typedef unsigned int uint;
|
||||
|
||||
struct SimParams {
|
||||
float4 colliderPos;
|
||||
float colliderRadius;
|
||||
|
||||
float3 gravity;
|
||||
float globalDamping;
|
||||
float particleRadius;
|
||||
|
||||
uint3 gridSize;
|
||||
uint numCells;
|
||||
float3 worldOrigin;
|
||||
|
||||
float3 cellSize;
|
||||
float3 worldSize;
|
||||
uint3 m_gridSize;
|
||||
|
||||
uint numBodies;
|
||||
uint maxParticlesPerCell;
|
||||
|
||||
float spring;
|
||||
float damping;
|
||||
float shear;
|
||||
float attraction;
|
||||
float boundaryDamping;
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
79
Extras/CUDA/radixsort.cu
Normal file
79
Extras/CUDA/radixsort.cu
Normal file
@@ -0,0 +1,79 @@
|
||||
/*
|
||||
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO USER:
|
||||
*
|
||||
* This source code is subject to NVIDIA ownership rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
|
||||
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
|
||||
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
|
||||
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
|
||||
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
|
||||
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
|
||||
* OR PERFORMANCE OF THIS SOURCE CODE.
|
||||
*
|
||||
* U.S. Government End Users. This source code is a "commercial item" as
|
||||
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
|
||||
* "commercial computer software" and "commercial computer software
|
||||
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
|
||||
* and is provided to the U.S. Government only as a commercial end item.
|
||||
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
|
||||
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
|
||||
* source code with only those rights set forth herein.
|
||||
*/
|
||||
|
||||
/* Radixsort project with key/value and arbitrary datset size support
|
||||
* which demonstrates the use of CUDA in a multi phase sorting
|
||||
* computation.
|
||||
* Host code.
|
||||
*/
|
||||
|
||||
#include "radixsort.cuh"
|
||||
#include "radixsort_kernel.cu"
|
||||
|
||||
extern "C"
|
||||
{
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Perform a radix sort
|
||||
//! Sorting performed in place on passed arrays.
|
||||
//!
|
||||
//! @param pData0 input and output array - data will be sorted
|
||||
//! @param pData1 additional array to allow ping pong computation
|
||||
//! @param elements number of elements to sort
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
void RadixSort(KeyValuePair *pData0, KeyValuePair *pData1, uint elements, uint bits)
|
||||
{
|
||||
// Round element count to total number of threads for efficiency
|
||||
uint elements_rounded_to_3072;
|
||||
int modval = elements % 3072;
|
||||
if( modval == 0 )
|
||||
elements_rounded_to_3072 = elements;
|
||||
else
|
||||
elements_rounded_to_3072 = elements + (3072 - (modval));
|
||||
|
||||
// Iterate over n bytes of y bit word, using each byte to sort the list in turn
|
||||
for (uint shift = 0; shift < bits; shift += RADIX)
|
||||
{
|
||||
// Perform one round of radix sorting
|
||||
|
||||
// Generate per radix group sums radix counts across a radix group
|
||||
RadixSum<<<NUM_BLOCKS, NUM_THREADS_PER_BLOCK, GRFSIZE>>>(pData0, elements, elements_rounded_to_3072, shift);
|
||||
// Prefix sum in radix groups, and then between groups throughout a block
|
||||
RadixPrefixSum<<<PREFIX_NUM_BLOCKS, PREFIX_NUM_THREADS_PER_BLOCK, PREFIX_GRFSIZE>>>();
|
||||
// Sum the block offsets and then shuffle data into bins
|
||||
RadixAddOffsetsAndShuffle<<<NUM_BLOCKS, NUM_THREADS_PER_BLOCK, SHUFFLE_GRFSIZE>>>(pData0, pData1, elements, elements_rounded_to_3072, shift);
|
||||
|
||||
// Exchange data pointers
|
||||
KeyValuePair* pTemp = pData0;
|
||||
pData0 = pData1;
|
||||
pData1 = pTemp;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
63
Extras/CUDA/radixsort.cuh
Normal file
63
Extras/CUDA/radixsort.cuh
Normal file
@@ -0,0 +1,63 @@
|
||||
/*
|
||||
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO USER:
|
||||
*
|
||||
* This source code is subject to NVIDIA ownership rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
|
||||
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
|
||||
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
|
||||
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
|
||||
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
|
||||
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
|
||||
* OR PERFORMANCE OF THIS SOURCE CODE.
|
||||
*
|
||||
* U.S. Government End Users. This source code is a "commercial item" as
|
||||
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
|
||||
* "commercial computer software" and "commercial computer software
|
||||
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
|
||||
* and is provided to the U.S. Government only as a commercial end item.
|
||||
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
|
||||
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
|
||||
* source code with only those rights set forth herein.
|
||||
*/
|
||||
|
||||
/* Radixsort project which demonstrates the use of CUDA in a multi phase
|
||||
* sorting computation.
|
||||
* Type definitions.
|
||||
*/
|
||||
|
||||
#ifndef _RADIXSORT_H_
|
||||
#define _RADIXSORT_H_
|
||||
|
||||
#include <host_defines.h>
|
||||
|
||||
#define SYNCIT __syncthreads()
|
||||
|
||||
// Use 16 bit keys/values
|
||||
#define SIXTEEN 0
|
||||
|
||||
typedef unsigned int uint;
|
||||
typedef unsigned short ushort;
|
||||
|
||||
#if SIXTEEN
|
||||
typedef struct __align__(4) {
|
||||
ushort key;
|
||||
ushort value;
|
||||
#else
|
||||
typedef struct __align__(8) {
|
||||
uint key;
|
||||
uint value;
|
||||
#endif
|
||||
} KeyValuePair;
|
||||
|
||||
extern "C" {
|
||||
void RadixSort(KeyValuePair *pData0, KeyValuePair *pData1, uint elements, uint bits);
|
||||
}
|
||||
|
||||
#endif // #ifndef _RADIXSORT_H_
|
||||
577
Extras/CUDA/radixsort_kernel.cu
Normal file
577
Extras/CUDA/radixsort_kernel.cu
Normal file
@@ -0,0 +1,577 @@
|
||||
/*
|
||||
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO USER:
|
||||
*
|
||||
* This source code is subject to NVIDIA ownership rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
|
||||
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
|
||||
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
|
||||
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
|
||||
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
|
||||
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
|
||||
* OR PERFORMANCE OF THIS SOURCE CODE.
|
||||
*
|
||||
* U.S. Government End Users. This source code is a "commercial item" as
|
||||
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
|
||||
* "commercial computer software" and "commercial computer software
|
||||
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
|
||||
* and is provided to the U.S. Government only as a commercial end item.
|
||||
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
|
||||
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
|
||||
* source code with only those rights set forth herein.
|
||||
*/
|
||||
|
||||
/* Radixsort project with key/value and arbitrary datset size support
|
||||
* which demonstrates the use of CUDA in a multi phase sorting
|
||||
* computation.
|
||||
* Device code.
|
||||
*/
|
||||
|
||||
#ifndef _RADIXSORT_KERNEL_H_
|
||||
#define _RADIXSORT_KERNEL_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include "radixsort.cuh"
|
||||
|
||||
#define SYNCIT __syncthreads()
|
||||
|
||||
static const int NUM_SMS = 16;
|
||||
static const int NUM_THREADS_PER_SM = 192;
|
||||
static const int NUM_THREADS_PER_BLOCK = 64;
|
||||
//static const int NUM_THREADS = NUM_THREADS_PER_SM * NUM_SMS;
|
||||
static const int NUM_BLOCKS = (NUM_THREADS_PER_SM / NUM_THREADS_PER_BLOCK) * NUM_SMS;
|
||||
static const int RADIX = 8; // Number of bits per radix sort pass
|
||||
static const int RADICES = 1 << RADIX; // Number of radices
|
||||
static const int RADIXMASK = RADICES - 1; // Mask for each radix sort pass
|
||||
#if SIXTEEN
|
||||
static const int RADIXBITS = 16; // Number of bits to sort over
|
||||
#else
|
||||
static const int RADIXBITS = 32; // Number of bits to sort over
|
||||
#endif
|
||||
static const int RADIXTHREADS = 16; // Number of threads sharing each radix counter
|
||||
static const int RADIXGROUPS = NUM_THREADS_PER_BLOCK / RADIXTHREADS; // Number of radix groups per CTA
|
||||
static const int TOTALRADIXGROUPS = NUM_BLOCKS * RADIXGROUPS; // Number of radix groups for each radix
|
||||
static const int SORTRADIXGROUPS = TOTALRADIXGROUPS * RADICES; // Total radix count
|
||||
static const int GRFELEMENTS = (NUM_THREADS_PER_BLOCK / RADIXTHREADS) * RADICES;
|
||||
static const int GRFSIZE = GRFELEMENTS * sizeof(uint);
|
||||
|
||||
// Prefix sum variables
|
||||
static const int PREFIX_NUM_THREADS_PER_SM = NUM_THREADS_PER_SM;
|
||||
static const int PREFIX_NUM_THREADS_PER_BLOCK = PREFIX_NUM_THREADS_PER_SM;
|
||||
static const int PREFIX_NUM_BLOCKS = (PREFIX_NUM_THREADS_PER_SM / PREFIX_NUM_THREADS_PER_BLOCK) * NUM_SMS;
|
||||
static const int PREFIX_BLOCKSIZE = SORTRADIXGROUPS / PREFIX_NUM_BLOCKS;
|
||||
static const int PREFIX_GRFELEMENTS = PREFIX_BLOCKSIZE + 2 * PREFIX_NUM_THREADS_PER_BLOCK;
|
||||
static const int PREFIX_GRFSIZE = PREFIX_GRFELEMENTS * sizeof(uint);
|
||||
|
||||
// Shuffle variables
|
||||
static const int SHUFFLE_GRFOFFSET = RADIXGROUPS * RADICES;
|
||||
static const int SHUFFLE_GRFELEMENTS = SHUFFLE_GRFOFFSET + PREFIX_NUM_BLOCKS;
|
||||
static const int SHUFFLE_GRFSIZE = SHUFFLE_GRFELEMENTS * sizeof(uint);
|
||||
|
||||
|
||||
#define SDATA( index) CUT_BANK_CHECKER(sdata, index)
|
||||
|
||||
// Prefix sum data
|
||||
uint gRadixSum[TOTALRADIXGROUPS * RADICES];
|
||||
__device__ uint dRadixSum[TOTALRADIXGROUPS * RADICES];
|
||||
uint gRadixBlockSum[PREFIX_NUM_BLOCKS];
|
||||
__device__ uint dRadixBlockSum[PREFIX_NUM_BLOCKS];
|
||||
|
||||
extern __shared__ uint sRadixSum[];
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Perform a radix sum on the list to be sorted. Each SM holds a set of
|
||||
//! radix counters for each group of RADIXGROUPS thread in the GRF.
|
||||
//!
|
||||
//! @param pData input data
|
||||
//! @param elements total number of elements
|
||||
//! @param elements_rounded_to_3072 total number of elements rounded up to the
|
||||
//! nearest multiple of 3072
|
||||
//! @param shift the shift (0 to 24) that we are using to obtain the correct
|
||||
//! byte
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
__global__ void RadixSum(KeyValuePair *pData, uint elements, uint elements_rounded_to_3072, uint shift)
|
||||
{
|
||||
uint pos = threadIdx.x;
|
||||
|
||||
// Zero radix counts
|
||||
while (pos < GRFELEMENTS)
|
||||
{
|
||||
sRadixSum[pos] = 0;
|
||||
pos += NUM_THREADS_PER_BLOCK;
|
||||
}
|
||||
|
||||
// Sum up data
|
||||
// Source addresses computed so that each thread is reading from a block of
|
||||
// consecutive addresses so there are no conflicts between threads
|
||||
// They then loop over their combined region and the next batch works elsewhere.
|
||||
// So threads 0 to 16 work on memory 0 to 320.
|
||||
// First reading 0,1,2,3...15 then 16,17,18,19...31 and so on
|
||||
// optimising parallel access to shared memory by a thread accessing 16*threadID
|
||||
// The next radix group runs from 320 to 640 and the same applies in that region
|
||||
uint tmod = threadIdx.x % RADIXTHREADS;
|
||||
uint tpos = threadIdx.x / RADIXTHREADS;
|
||||
|
||||
// Take the rounded element list size so that all threads have a certain size dataset to work with
|
||||
// and no zero size datasets confusing the issue
|
||||
// By using a multiple of 3072 we ensure that all threads have elements
|
||||
// to work with until the last phase, at which point we individually test
|
||||
uint element_fraction = elements_rounded_to_3072 / TOTALRADIXGROUPS;
|
||||
|
||||
// Generate range
|
||||
// Note that it is possible for both pos and end to be past the end of the element set
|
||||
// which will be caught later.
|
||||
pos = (blockIdx.x * RADIXGROUPS + tpos) * element_fraction;
|
||||
uint end = pos + element_fraction;
|
||||
pos += tmod;
|
||||
//printf("pos: %d\n", pos);
|
||||
__syncthreads();
|
||||
|
||||
while (pos < end )
|
||||
{
|
||||
uint key = 0;
|
||||
|
||||
// Read first data element if we are in the set of elements
|
||||
//if( pos < elements )
|
||||
//key = pData[pos].key;
|
||||
KeyValuePair kvp;
|
||||
// Read first data element, both items at once as the memory will want to coalesce like that anyway
|
||||
if (pos < elements)
|
||||
kvp = pData[pos];
|
||||
else
|
||||
kvp.key = 0;
|
||||
key = kvp.key;
|
||||
|
||||
|
||||
// Calculate position of radix counter to increment
|
||||
// There are RADICES radices in each pass (256)
|
||||
// and hence this many counters for bin grouping
|
||||
// Multiply by RADIXGROUPS (4) to spread through memory
|
||||
// and into 4 radix groups
|
||||
uint p = ((key >> shift) & RADIXMASK) * RADIXGROUPS;
|
||||
|
||||
// Increment radix counters
|
||||
// Each radix group has its own set of counters
|
||||
// so we add the thread position [0-3], ie the group index.
|
||||
// We slow down here and take at least 16 cycles to write to the summation boxes
|
||||
// but other groups will only conflict with themselves and so can also be writing
|
||||
// 16 cycles here at least avoids retries.
|
||||
uint ppos = p + tpos;
|
||||
|
||||
// If we are past the last element we don't want to do anything
|
||||
// We do have to check each time, however, to ensure that all
|
||||
// threads sync on each sync here.
|
||||
if (tmod == 0 && pos < elements)
|
||||
sRadixSum[ppos]++;
|
||||
SYNCIT;
|
||||
if (tmod == 1 && pos < elements)
|
||||
sRadixSum[ppos]++;
|
||||
SYNCIT;
|
||||
if (tmod == 2 && pos < elements)
|
||||
sRadixSum[ppos]++;
|
||||
SYNCIT;
|
||||
if (tmod == 3 && pos < elements)
|
||||
sRadixSum[ppos]++;
|
||||
SYNCIT;
|
||||
if (tmod == 4 && pos < elements)
|
||||
sRadixSum[ppos]++;
|
||||
SYNCIT;
|
||||
if (tmod == 5 && pos < elements)
|
||||
sRadixSum[ppos]++;
|
||||
SYNCIT;
|
||||
if (tmod == 6 && pos < elements)
|
||||
sRadixSum[ppos]++;
|
||||
SYNCIT;
|
||||
if (tmod == 7 && pos < elements)
|
||||
sRadixSum[ppos]++;
|
||||
SYNCIT;
|
||||
if (tmod == 8 && pos < elements)
|
||||
sRadixSum[ppos]++;
|
||||
SYNCIT;
|
||||
if (tmod == 9 && pos < elements)
|
||||
sRadixSum[ppos]++;
|
||||
SYNCIT;
|
||||
if (tmod == 10 && pos < elements)
|
||||
sRadixSum[ppos]++;
|
||||
SYNCIT;
|
||||
if (tmod == 11 && pos < elements)
|
||||
sRadixSum[ppos]++;
|
||||
SYNCIT;
|
||||
if (tmod == 12 && pos < elements)
|
||||
sRadixSum[ppos]++;
|
||||
SYNCIT;
|
||||
if (tmod == 13 && pos < elements)
|
||||
sRadixSum[ppos]++;
|
||||
SYNCIT;
|
||||
if (tmod == 14 && pos < elements)
|
||||
sRadixSum[ppos]++;
|
||||
SYNCIT;
|
||||
if (tmod == 15 && pos < elements)
|
||||
sRadixSum[ppos]++;
|
||||
SYNCIT;
|
||||
|
||||
pos += RADIXTHREADS;
|
||||
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
__syncthreads();
|
||||
|
||||
// Output radix sums into separate memory regions for each radix group
|
||||
// So this memory then is layed out:
|
||||
// 0...... 192..... 384 ................ 192*256
|
||||
// ie all 256 bins for each radix group
|
||||
// in there:
|
||||
// 0.............192
|
||||
// 0 4 8 12... - block idx * 4
|
||||
// And in the block boxes we see the 4 radix groups for that block
|
||||
// So 0-192 should contain bin 0 for each radix group, and so on
|
||||
uint offset = blockIdx.x * RADIXGROUPS;
|
||||
uint row = threadIdx.x / RADIXGROUPS;
|
||||
uint column = threadIdx.x % RADIXGROUPS;
|
||||
while (row < RADICES)
|
||||
{
|
||||
dRadixSum[offset + row * TOTALRADIXGROUPS + column] = sRadixSum[row * RADIXGROUPS + column];
|
||||
row += NUM_THREADS_PER_BLOCK / RADIXGROUPS;
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Performs first part of parallel prefix sum - individual sums of each radix
|
||||
//! count. By the end of this we have prefix sums on a block level in dRadixSum
|
||||
//! and totals for blocks in dRadixBlockSum.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
__global__ void RadixPrefixSum()
|
||||
{
|
||||
// Read radix groups in offset by one in the GRF so a zero can be inserted at the beginning
|
||||
// and the final sum of all radix counts summed here is tacked onto the end for reading by
|
||||
// the next stage
|
||||
// Each block in this case is the full number of threads per SM (and hence the total number
|
||||
// of radix groups), 192. We should then have the total set of offsets for an entire radix
|
||||
// group by the end of this stage
|
||||
// Device mem addressing
|
||||
|
||||
uint brow = blockIdx.x * (RADICES / PREFIX_NUM_BLOCKS);
|
||||
uint drow = threadIdx.x / TOTALRADIXGROUPS; // In default parameterisation this is always 0
|
||||
uint dcolumn = threadIdx.x % TOTALRADIXGROUPS; // And similarly this is always the same as threadIdx.x
|
||||
uint dpos = (brow + drow) * TOTALRADIXGROUPS + dcolumn;
|
||||
uint end = ((blockIdx.x + 1) * (RADICES / PREFIX_NUM_BLOCKS)) * TOTALRADIXGROUPS;
|
||||
// Shared mem addressing
|
||||
uint srow = threadIdx.x / (PREFIX_BLOCKSIZE / PREFIX_NUM_THREADS_PER_BLOCK);
|
||||
uint scolumn = threadIdx.x % (PREFIX_BLOCKSIZE / PREFIX_NUM_THREADS_PER_BLOCK);
|
||||
uint spos = srow * (PREFIX_BLOCKSIZE / PREFIX_NUM_THREADS_PER_BLOCK + 1) + scolumn;
|
||||
|
||||
// Read (RADICES / PREFIX_NUM_BLOCKS) radix counts into the GRF alongside each other
|
||||
while (dpos < end)
|
||||
{
|
||||
sRadixSum[spos] = dRadixSum[dpos];
|
||||
spos += (PREFIX_NUM_THREADS_PER_BLOCK / (PREFIX_BLOCKSIZE / PREFIX_NUM_THREADS_PER_BLOCK)) *
|
||||
(PREFIX_BLOCKSIZE / PREFIX_NUM_THREADS_PER_BLOCK + 1);
|
||||
dpos += (TOTALRADIXGROUPS / PREFIX_NUM_THREADS_PER_BLOCK) * TOTALRADIXGROUPS;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
// Perform preliminary sum on each thread's stretch of data
|
||||
// Each thread having a block of 16, with spacers between 0...16 18...33 and so on
|
||||
int pos = threadIdx.x * (PREFIX_BLOCKSIZE / PREFIX_NUM_THREADS_PER_BLOCK + 1);
|
||||
end = pos + (PREFIX_BLOCKSIZE / PREFIX_NUM_THREADS_PER_BLOCK);
|
||||
uint sum = 0;
|
||||
while (pos < end)
|
||||
{
|
||||
sum += sRadixSum[pos];
|
||||
sRadixSum[pos] = sum;
|
||||
pos++;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
|
||||
// Calculate internal offsets by performing a more traditional parallel
|
||||
// prefix sum of the topmost member of each thread's work data. Right now,
|
||||
// these are stored between the work data for each thread, allowing us to
|
||||
// eliminate GRF conflicts as well as hold the offsets needed to complete the sum
|
||||
// In other words we have:
|
||||
// 0....15 16 17....32 33 34....
|
||||
// Where this first stage updates the intermediate values (so 16=15, 33=32 etc)
|
||||
int m = (PREFIX_BLOCKSIZE / PREFIX_NUM_THREADS_PER_BLOCK + 1);
|
||||
pos = threadIdx.x * (PREFIX_BLOCKSIZE / PREFIX_NUM_THREADS_PER_BLOCK + 1) +
|
||||
(PREFIX_BLOCKSIZE / PREFIX_NUM_THREADS_PER_BLOCK);
|
||||
sRadixSum[pos] = sRadixSum[pos - 1];
|
||||
__syncthreads();
|
||||
// This stage then performs a parallel prefix sum (ie use powers of 2 to propagate in log n stages)
|
||||
// to update 17, 34 etc with the totals to that point (so 34 becomes [34] + [17]) and so on.
|
||||
while (m < PREFIX_NUM_THREADS_PER_BLOCK * (PREFIX_BLOCKSIZE / PREFIX_NUM_THREADS_PER_BLOCK + 1))
|
||||
{
|
||||
int p = pos - m;
|
||||
uint t = ((p > 0) ? sRadixSum[p] : 0);
|
||||
__syncthreads();
|
||||
sRadixSum[pos] += t;
|
||||
__syncthreads();
|
||||
m *= 2;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
|
||||
|
||||
// Add internal offsets to each thread's work data.
|
||||
// So now we take 17 and add it to all values 18 to 33 so all offsets for that block
|
||||
// are updated.
|
||||
pos = threadIdx.x * (PREFIX_BLOCKSIZE / PREFIX_NUM_THREADS_PER_BLOCK + 1);
|
||||
end = pos + (PREFIX_BLOCKSIZE / PREFIX_NUM_THREADS_PER_BLOCK);
|
||||
int p = pos - 1;
|
||||
sum = ((p > 0) ? sRadixSum[p] : 0);
|
||||
while (pos < end)
|
||||
{
|
||||
sRadixSum[pos] += sum;
|
||||
pos++;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
// Write summed data back out to global memory in the same way as we read it in
|
||||
// We now have prefix sum values internal to groups
|
||||
brow = blockIdx.x * (RADICES / PREFIX_NUM_BLOCKS);
|
||||
drow = threadIdx.x / TOTALRADIXGROUPS;
|
||||
dcolumn = threadIdx.x % TOTALRADIXGROUPS;
|
||||
srow = threadIdx.x / (PREFIX_BLOCKSIZE / PREFIX_NUM_THREADS_PER_BLOCK);
|
||||
scolumn = threadIdx.x % (PREFIX_BLOCKSIZE / PREFIX_NUM_THREADS_PER_BLOCK);
|
||||
dpos = (brow + drow) * TOTALRADIXGROUPS + dcolumn + 1;
|
||||
spos = srow * (PREFIX_BLOCKSIZE / PREFIX_NUM_THREADS_PER_BLOCK + 1) + scolumn;
|
||||
end = ((blockIdx.x + 1) * RADICES / PREFIX_NUM_BLOCKS) * TOTALRADIXGROUPS;
|
||||
while (dpos < end)
|
||||
{
|
||||
dRadixSum[dpos] = sRadixSum[spos];
|
||||
dpos += (TOTALRADIXGROUPS / PREFIX_NUM_THREADS_PER_BLOCK) * TOTALRADIXGROUPS;
|
||||
spos += (PREFIX_NUM_THREADS_PER_BLOCK / (PREFIX_BLOCKSIZE / PREFIX_NUM_THREADS_PER_BLOCK)) *
|
||||
(PREFIX_BLOCKSIZE / PREFIX_NUM_THREADS_PER_BLOCK + 1);
|
||||
}
|
||||
|
||||
// Write last element to summation
|
||||
// Storing block sums in a separate array
|
||||
if (threadIdx.x == 0) {
|
||||
dRadixBlockSum[blockIdx.x] = sRadixSum[PREFIX_NUM_THREADS_PER_BLOCK * (PREFIX_BLOCKSIZE / PREFIX_NUM_THREADS_PER_BLOCK + 1) - 1];
|
||||
dRadixSum[blockIdx.x * PREFIX_BLOCKSIZE] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Initially perform prefix sum of block totals to obtain final set of offsets.
|
||||
//! Then make use of radix sums to perform a shuffling of the data into the
|
||||
//! correct bins.
|
||||
//!
|
||||
//! @param pSrc input data
|
||||
//! @param pDst output data
|
||||
//! @param elements total number of elements
|
||||
//! @param shift the shift (0 to 24) that we are using to obtain the correct
|
||||
//! byte
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
__global__ void RadixAddOffsetsAndShuffle(KeyValuePair* pSrc, KeyValuePair* pDst, uint elements, uint elements_rounded_to_3072, int shift)
|
||||
{
|
||||
// Read offsets from previous blocks
|
||||
if (threadIdx.x == 0)
|
||||
sRadixSum[SHUFFLE_GRFOFFSET] = 0;
|
||||
|
||||
if (threadIdx.x < PREFIX_NUM_BLOCKS - 1)
|
||||
sRadixSum[SHUFFLE_GRFOFFSET + threadIdx.x + 1] = dRadixBlockSum[threadIdx.x];
|
||||
__syncthreads();
|
||||
|
||||
// Parallel prefix sum over block sums
|
||||
int pos = threadIdx.x;
|
||||
int n = 1;
|
||||
while (n < PREFIX_NUM_BLOCKS)
|
||||
{
|
||||
int ppos = pos - n;
|
||||
uint t0 = ((pos < PREFIX_NUM_BLOCKS) && (ppos >= 0)) ? sRadixSum[SHUFFLE_GRFOFFSET + ppos] : 0;
|
||||
__syncthreads();
|
||||
if (pos < PREFIX_NUM_BLOCKS)
|
||||
sRadixSum[SHUFFLE_GRFOFFSET + pos] += t0;
|
||||
__syncthreads();
|
||||
n *= 2;
|
||||
}
|
||||
|
||||
// Read radix count data and add appropriate block offset
|
||||
// for each radix at the memory location for this thread
|
||||
// (where the other threads in the block will be reading
|
||||
// as well, hence the large stride).
|
||||
// There is one counter box per radix group per radix
|
||||
// per block (4*256*3)
|
||||
// We use 64 threads to read the 4 radix groups set of radices
|
||||
// for the block.
|
||||
int row = threadIdx.x / RADIXGROUPS;
|
||||
int column = threadIdx.x % RADIXGROUPS;
|
||||
int spos = row * RADIXGROUPS + column;
|
||||
int dpos = row * TOTALRADIXGROUPS + column + blockIdx.x * RADIXGROUPS;
|
||||
while (spos < SHUFFLE_GRFOFFSET)
|
||||
{
|
||||
sRadixSum[spos] = dRadixSum[dpos] + sRadixSum[SHUFFLE_GRFOFFSET + dpos / (TOTALRADIXGROUPS * RADICES / PREFIX_NUM_BLOCKS)];
|
||||
spos += NUM_THREADS_PER_BLOCK;
|
||||
dpos += (NUM_THREADS_PER_BLOCK / RADIXGROUPS) * TOTALRADIXGROUPS;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
//int pos;
|
||||
// Shuffle data
|
||||
// Each of the subbins for a block should be filled via the counters, properly interleaved
|
||||
// Then, as we now iterate over each data value, we increment the subbins (each thread in the
|
||||
// radix group in turn to avoid miss writes due to conflicts) and set locations correctly.
|
||||
uint element_fraction = elements_rounded_to_3072 / TOTALRADIXGROUPS;
|
||||
int tmod = threadIdx.x % RADIXTHREADS;
|
||||
int tpos = threadIdx.x / RADIXTHREADS;
|
||||
|
||||
pos = (blockIdx.x * RADIXGROUPS + tpos) * element_fraction;
|
||||
uint end = pos + element_fraction; //(blockIdx.x * RADIXGROUPS + tpos + 1) * element_fraction;
|
||||
pos += tmod;
|
||||
|
||||
__syncthreads();
|
||||
|
||||
while (pos < end )
|
||||
{
|
||||
KeyValuePair kvp;
|
||||
#if 1 // old load
|
||||
// Read first data element, both items at once as the memory will want to coalesce like that anyway
|
||||
if (pos < elements)
|
||||
{
|
||||
kvp = pSrc[pos];
|
||||
}
|
||||
else
|
||||
kvp.key = 0;
|
||||
|
||||
#else // casting to float2 to get it to combine loads
|
||||
int2 kvpf2;
|
||||
|
||||
// Read first data element, both items at once as the memory will want to coalesce like that anyway
|
||||
if (pos < elements)
|
||||
{
|
||||
// kvp = pSrc[pos];
|
||||
kvpf2 = ((int2*)pSrc)[pos];
|
||||
// printf("kvp: %f %f kvpf2: %f %f\n", kvp.key, kvp.value, kvpf2.x, kvpf2.y);
|
||||
}
|
||||
else
|
||||
//kvp.key = 0;
|
||||
kvpf2.x = 0;
|
||||
|
||||
kvp.key = kvpf2.x;
|
||||
kvp.value = kvpf2.y;
|
||||
#endif
|
||||
|
||||
uint index;
|
||||
|
||||
// Calculate position of radix counter to increment
|
||||
uint p = ((kvp.key >> shift) & RADIXMASK) * RADIXGROUPS;
|
||||
|
||||
// Move data, keeping counts updated.
|
||||
// Increment radix counters, relying on hexadecathread
|
||||
// warp to prevent this code from stepping all over itself.
|
||||
uint ppos = p + tpos;
|
||||
if (tmod == 0 && pos < elements)
|
||||
{
|
||||
index = sRadixSum[ppos]++;
|
||||
pDst[index] = kvp;
|
||||
}
|
||||
SYNCIT;
|
||||
if (tmod == 1 && pos < elements)
|
||||
{
|
||||
index = sRadixSum[ppos]++;
|
||||
pDst[index] = kvp;
|
||||
}
|
||||
SYNCIT;
|
||||
if (tmod == 2 && pos < elements)
|
||||
{
|
||||
index = sRadixSum[ppos]++;
|
||||
pDst[index] = kvp;
|
||||
}
|
||||
SYNCIT;
|
||||
if (tmod == 3 && pos < elements)
|
||||
{
|
||||
index = sRadixSum[ppos]++;
|
||||
pDst[index] = kvp;
|
||||
}
|
||||
SYNCIT;
|
||||
if (tmod == 4 && pos < elements)
|
||||
{
|
||||
index = sRadixSum[ppos]++;
|
||||
pDst[index] = kvp;
|
||||
}
|
||||
SYNCIT;
|
||||
if (tmod == 5 && pos < elements)
|
||||
{
|
||||
index = sRadixSum[ppos]++;
|
||||
pDst[index] = kvp;
|
||||
}
|
||||
SYNCIT;
|
||||
if (tmod == 6 && pos < elements)
|
||||
{
|
||||
index = sRadixSum[ppos]++;
|
||||
pDst[index] = kvp;
|
||||
}
|
||||
SYNCIT;
|
||||
if (tmod == 7 && pos < elements)
|
||||
{
|
||||
index = sRadixSum[ppos]++;
|
||||
pDst[index] = kvp;
|
||||
}
|
||||
SYNCIT;
|
||||
if (tmod == 8 && pos < elements)
|
||||
{
|
||||
index = sRadixSum[ppos]++;
|
||||
pDst[index] = kvp;
|
||||
}
|
||||
SYNCIT;
|
||||
if (tmod == 9 && pos < elements)
|
||||
{
|
||||
index = sRadixSum[ppos]++;
|
||||
pDst[index] = kvp;
|
||||
}
|
||||
SYNCIT;
|
||||
if (tmod == 10 && pos < elements)
|
||||
{
|
||||
index = sRadixSum[ppos]++;
|
||||
pDst[index] = kvp;
|
||||
}
|
||||
SYNCIT;
|
||||
if (tmod == 11 && pos < elements)
|
||||
{
|
||||
index = sRadixSum[ppos]++;
|
||||
pDst[index] = kvp;
|
||||
}
|
||||
SYNCIT;
|
||||
if (tmod == 12 && pos < elements)
|
||||
{
|
||||
index = sRadixSum[ppos]++;
|
||||
pDst[index] = kvp;
|
||||
}
|
||||
SYNCIT;
|
||||
if (tmod == 13 && pos < elements)
|
||||
{
|
||||
index = sRadixSum[ppos]++;
|
||||
pDst[index] = kvp;
|
||||
}
|
||||
SYNCIT;
|
||||
if (tmod == 14 && pos < elements)
|
||||
{
|
||||
index = sRadixSum[ppos]++;
|
||||
pDst[index] = kvp;
|
||||
}
|
||||
SYNCIT;
|
||||
if (tmod == 15 && pos < elements)
|
||||
{
|
||||
index = sRadixSum[ppos]++;
|
||||
pDst[index] = kvp;
|
||||
}
|
||||
SYNCIT;
|
||||
|
||||
pos += RADIXTHREADS;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
#endif // #ifndef _RADIXSORT_KERNEL_H_
|
||||
180
Extras/CUDA/render_particles.cpp
Normal file
180
Extras/CUDA/render_particles.cpp
Normal file
@@ -0,0 +1,180 @@
|
||||
/*
|
||||
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO USER:
|
||||
*
|
||||
* This source code is subject to NVIDIA ownership rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
|
||||
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
|
||||
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
|
||||
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
|
||||
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
|
||||
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
|
||||
* OR PERFORMANCE OF THIS SOURCE CODE.
|
||||
*
|
||||
* U.S. Government End Users. This source code is a "commercial item" as
|
||||
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
|
||||
* "commercial computer software" and "commercial computer software
|
||||
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
|
||||
* and is provided to the U.S. Government only as a commercial end item.
|
||||
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
|
||||
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
|
||||
* source code with only those rights set forth herein.
|
||||
*/
|
||||
|
||||
#include <GL/glew.h>
|
||||
|
||||
#include <math.h>
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "render_particles.h"
|
||||
#include "shaders.h"
|
||||
|
||||
#ifndef M_PI
|
||||
#define M_PI 3.1415926535897932384626433832795
|
||||
#endif
|
||||
|
||||
ParticleRenderer::ParticleRenderer()
|
||||
: m_pos(0),
|
||||
m_numParticles(0),
|
||||
m_pointSize(1.0f),
|
||||
m_particleRadius(0.125f * 0.5f),
|
||||
m_program(0),
|
||||
m_vbo(0),
|
||||
m_colorVBO(0)
|
||||
{
|
||||
_initGL();
|
||||
}
|
||||
|
||||
ParticleRenderer::~ParticleRenderer()
|
||||
{
|
||||
m_pos = 0;
|
||||
}
|
||||
|
||||
void ParticleRenderer::setPositions(float *pos, int numParticles)
|
||||
{
|
||||
m_pos = pos;
|
||||
m_numParticles = numParticles;
|
||||
}
|
||||
|
||||
void ParticleRenderer::setVertexBuffer(unsigned int vbo, int numParticles)
|
||||
{
|
||||
m_vbo = vbo;
|
||||
m_numParticles = numParticles;
|
||||
}
|
||||
|
||||
void ParticleRenderer::_drawPoints()
|
||||
{
|
||||
if (!m_vbo)
|
||||
{
|
||||
glBegin(GL_POINTS);
|
||||
{
|
||||
int k = 0;
|
||||
for (int i = 0; i < m_numParticles; ++i)
|
||||
{
|
||||
glVertex3fv(&m_pos[k]);
|
||||
k += 4;
|
||||
}
|
||||
}
|
||||
glEnd();
|
||||
}
|
||||
else
|
||||
{
|
||||
glBindBufferARB(GL_ARRAY_BUFFER_ARB, m_vbo);
|
||||
glVertexPointer(4, GL_FLOAT, 0, 0);
|
||||
glEnableClientState(GL_VERTEX_ARRAY);
|
||||
|
||||
if (m_colorVBO) {
|
||||
glBindBufferARB(GL_ARRAY_BUFFER_ARB, m_colorVBO);
|
||||
glColorPointer(4, GL_FLOAT, 0, 0);
|
||||
glEnableClientState(GL_COLOR_ARRAY);
|
||||
}
|
||||
|
||||
glDrawArrays(GL_POINTS, 0, m_numParticles);
|
||||
|
||||
glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
|
||||
glDisableClientState(GL_VERTEX_ARRAY);
|
||||
glDisableClientState(GL_COLOR_ARRAY);
|
||||
}
|
||||
}
|
||||
|
||||
void ParticleRenderer::display(DisplayMode mode /* = PARTICLE_POINTS */)
|
||||
{
|
||||
switch (mode)
|
||||
{
|
||||
case PARTICLE_POINTS:
|
||||
glColor3f(1, 1, 1);
|
||||
glPointSize(m_pointSize);
|
||||
_drawPoints();
|
||||
break;
|
||||
|
||||
default:
|
||||
case PARTICLE_SPHERES:
|
||||
glEnable(GL_POINT_SPRITE_ARB);
|
||||
glTexEnvi(GL_POINT_SPRITE_ARB, GL_COORD_REPLACE_ARB, GL_TRUE);
|
||||
glEnable(GL_VERTEX_PROGRAM_POINT_SIZE_NV);
|
||||
glDepthMask(GL_TRUE);
|
||||
glEnable(GL_DEPTH_TEST);
|
||||
|
||||
glUseProgram(m_program);
|
||||
glUniform1f( glGetUniformLocation(m_program, "pointScale"), m_window_h / tanf(m_fov*0.5f*(float)M_PI/180.0f) );
|
||||
glUniform1f( glGetUniformLocation(m_program, "pointRadius"), m_particleRadius );
|
||||
|
||||
glColor3f(1, 1, 1);
|
||||
_drawPoints();
|
||||
|
||||
glUseProgram(0);
|
||||
glDisable(GL_POINT_SPRITE_ARB);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
GLuint
|
||||
ParticleRenderer::_compileProgram(const char *vsource, const char *fsource)
|
||||
{
|
||||
GLuint vertexShader = glCreateShader(GL_VERTEX_SHADER);
|
||||
GLuint fragmentShader = glCreateShader(GL_FRAGMENT_SHADER);
|
||||
|
||||
glShaderSource(vertexShader, 1, &vsource, 0);
|
||||
glShaderSource(fragmentShader, 1, &fsource, 0);
|
||||
|
||||
glCompileShader(vertexShader);
|
||||
glCompileShader(fragmentShader);
|
||||
|
||||
GLuint program = glCreateProgram();
|
||||
|
||||
glAttachShader(program, vertexShader);
|
||||
glAttachShader(program, fragmentShader);
|
||||
|
||||
glLinkProgram(program);
|
||||
|
||||
// check if program linked
|
||||
GLint success = 0;
|
||||
glGetProgramiv(program, GL_LINK_STATUS, &success);
|
||||
|
||||
if (!success) {
|
||||
char temp[256];
|
||||
glGetProgramInfoLog(program, 256, 0, temp);
|
||||
printf("Failed to link program:\n%s\n", temp);
|
||||
glDeleteProgram(program);
|
||||
program = 0;
|
||||
}
|
||||
|
||||
return program;
|
||||
}
|
||||
|
||||
void ParticleRenderer::_initGL()
|
||||
{
|
||||
m_program = _compileProgram(vertexShader, spherePixelShader);
|
||||
|
||||
#if !defined(__APPLE__) && !defined(MACOSX)
|
||||
glClampColorARB(GL_CLAMP_VERTEX_COLOR_ARB, GL_FALSE);
|
||||
glClampColorARB(GL_CLAMP_FRAGMENT_COLOR_ARB, GL_FALSE);
|
||||
#endif
|
||||
}
|
||||
78
Extras/CUDA/render_particles.h
Normal file
78
Extras/CUDA/render_particles.h
Normal file
@@ -0,0 +1,78 @@
|
||||
/*
|
||||
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO USER:
|
||||
*
|
||||
* This source code is subject to NVIDIA ownership rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
|
||||
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
|
||||
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
|
||||
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
|
||||
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
|
||||
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
|
||||
* OR PERFORMANCE OF THIS SOURCE CODE.
|
||||
*
|
||||
* U.S. Government End Users. This source code is a "commercial item" as
|
||||
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
|
||||
* "commercial computer software" and "commercial computer software
|
||||
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
|
||||
* and is provided to the U.S. Government only as a commercial end item.
|
||||
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
|
||||
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
|
||||
* source code with only those rights set forth herein.
|
||||
*/
|
||||
|
||||
#ifndef __RENDER_PARTICLES__
|
||||
#define __RENDER_PARTICLES__
|
||||
|
||||
class ParticleRenderer
|
||||
{
|
||||
public:
|
||||
ParticleRenderer();
|
||||
~ParticleRenderer();
|
||||
|
||||
void setPositions(float *pos, int numParticles);
|
||||
void setVertexBuffer(unsigned int vbo, int numParticles);
|
||||
void setColorBuffer(unsigned int vbo) { m_colorVBO = vbo; }
|
||||
|
||||
enum DisplayMode
|
||||
{
|
||||
PARTICLE_POINTS,
|
||||
PARTICLE_SPHERES,
|
||||
PARTICLE_NUM_MODES
|
||||
};
|
||||
|
||||
void display(DisplayMode mode = PARTICLE_POINTS);
|
||||
void displayGrid();
|
||||
|
||||
void setPointSize(float size) { m_pointSize = size; }
|
||||
void setParticleRadius(float r) { m_particleRadius = r; }
|
||||
void setFOV(float fov) { m_fov = fov; }
|
||||
void setWindowSize(int w, int h) { m_window_w = w; m_window_h = h; }
|
||||
|
||||
protected: // methods
|
||||
void _initGL();
|
||||
void _drawPoints();
|
||||
GLuint _compileProgram(const char *vsource, const char *fsource);
|
||||
|
||||
protected: // data
|
||||
float *m_pos;
|
||||
int m_numParticles;
|
||||
|
||||
float m_pointSize;
|
||||
float m_particleRadius;
|
||||
float m_fov;
|
||||
int m_window_w, m_window_h;
|
||||
|
||||
GLuint m_program;
|
||||
|
||||
GLuint m_vbo;
|
||||
GLuint m_colorVBO;
|
||||
};
|
||||
|
||||
#endif //__ RENDER_PARTICLES__
|
||||
41
Extras/CUDA/shaders.cpp
Normal file
41
Extras/CUDA/shaders.cpp
Normal file
@@ -0,0 +1,41 @@
|
||||
#define STRINGIFY(A) #A
|
||||
|
||||
// vertex shader
|
||||
const char *vertexShader = STRINGIFY(
|
||||
uniform float pointRadius; // point size in world space
|
||||
uniform float pointScale; // scale to calculate size in pixels
|
||||
uniform float densityScale;
|
||||
uniform float densityOffset;
|
||||
void main()
|
||||
{
|
||||
// calculate window-space point size
|
||||
vec3 posEye = vec3(gl_ModelViewMatrix * vec4(gl_Vertex.xyz, 1.0));
|
||||
float dist = length(posEye);
|
||||
gl_PointSize = pointRadius * (pointScale / dist);
|
||||
|
||||
gl_TexCoord[0] = gl_MultiTexCoord0;
|
||||
gl_Position = gl_ModelViewProjectionMatrix * vec4(gl_Vertex.xyz, 1.0);
|
||||
|
||||
gl_FrontColor = gl_Color;
|
||||
}
|
||||
);
|
||||
|
||||
// pixel shader for rendering points as shaded spheres
|
||||
const char *spherePixelShader = STRINGIFY(
|
||||
void main()
|
||||
{
|
||||
const vec3 lightDir = vec3(0.577, 0.577, 0.577);
|
||||
|
||||
// calculate normal from texture coordinates
|
||||
vec3 N;
|
||||
N.xy = gl_TexCoord[0].xy*vec2(2.0, -2.0) + vec2(-1.0, 1.0);
|
||||
float mag = dot(N.xy, N.xy);
|
||||
if (mag > 1.0) discard; // kill pixels outside circle
|
||||
N.z = sqrt(1.0-mag);
|
||||
|
||||
// calculate lighting
|
||||
float diffuse = max(0.0, dot(lightDir, N));
|
||||
|
||||
gl_FragColor = gl_Color * diffuse;
|
||||
}
|
||||
);
|
||||
2
Extras/CUDA/shaders.h
Normal file
2
Extras/CUDA/shaders.h
Normal file
@@ -0,0 +1,2 @@
|
||||
extern const char *vertexShader;
|
||||
extern const char *spherePixelShader;
|
||||
Reference in New Issue
Block a user