Added some performance measuring tools.

Speeded up collision detection significantly.
This commit is contained in:
sjbaker
2006-10-01 16:36:57 +00:00
parent 9ebc440ee5
commit a7eca49065
4 changed files with 267 additions and 71 deletions

View File

@@ -1,6 +1,7 @@
#include "GPU_physics.h"
#include "fboSupport.h"
#include "shaderSupport.h"
#include "clock.h"
#define TIMESTEP 0.016f
@@ -94,8 +95,6 @@ static FrameBufferObject *collisions ;
#define VERTS_PER_STRIP 8
#define NUM_VERTS ( NUM_CUBES * STRIPS_PER_CUBE * VERTS_PER_STRIP )
GLuint queries [ NUM_CUBES ] ;
static GLuint vbo_vx = 0 ;
static GLuint vbo_tx = 0 ;
static GLuint vbo_co = 0 ;
@@ -105,6 +104,15 @@ static float colours [ NUM_VERTS * 4 ] ;
static int starts [ NUM_CUBES * STRIPS_PER_CUBE ] ;
static int lengths [ NUM_CUBES * STRIPS_PER_CUBE ] ;
static GLuint vbo_collvx = 0 ;
static GLuint vbo_collt0 = 0 ;
static GLuint vbo_collt1 = 0 ;
static float collvertices [ NUM_CUBES * 4 * 3 ] ;
static float colltexcoords0 [ NUM_CUBES * 4 * 2 ] ;
static float colltexcoords1 [ NUM_CUBES * 4 * 2 ] ;
static int collstart ;
static int colllength ;
static int win_width = 640 ;
static int win_height = 480 ;
@@ -236,7 +244,7 @@ void initMotionTextures ()
if ( debugOpt != DRAW_WITHOUT_PHYSICS )
{
/* Random (but predominantly upwards) velocities. */
if(irand(8)==0)
if(irand(2)==0)
{
velocityData [ idToIndex(x,y) * 3 + 0 ] = frand ( 1.0f ) ;
velocityData [ idToIndex(x,y) * 3 + 1 ] = 0.0f ;
@@ -307,8 +315,6 @@ else
massSizeX -> fillTexture ( massSizeXData ) ;
collisions -> fillTexture ( collisionData ) ;
}
glGenQueriesARB ( NUM_CUBES, queries ) ;
}
@@ -405,6 +411,71 @@ void initPhysicsShaders ()
}
void initCollideVBO ()
{
float *p = collvertices ;
float *t0 = colltexcoords0 ;
float *t1 = colltexcoords1 ;
collstart = 0 ;
colllength = NUM_CUBES * 4 ;
for ( int y = 0 ; y < TEX_SIZE ; y++ )
for ( int x = 0 ; x < TEX_SIZE ; x++ )
{
/* Texcoord 0 data sets which corner of the texture this is. */
*t0++ = 0.5f /(float)TEX_SIZE ;
*t0++ = 0.5f /(float)TEX_SIZE ;
*t0++ = ((float)TEX_SIZE-0.5f)/(float)TEX_SIZE ;
*t0++ = 0.5f /(float)TEX_SIZE ;
*t0++ = ((float)TEX_SIZE-0.5f)/(float)TEX_SIZE ;
*t0++ = ((float)TEX_SIZE-0.5f)/(float)TEX_SIZE ;
*t0++ = 0.5f /(float)TEX_SIZE ;
*t0++ =((float)TEX_SIZE-0.5f)/(float)TEX_SIZE ;
/* Texcoord 1 sets which cube is which. */
*t1++ = ((float)x+0.5f)/(float)TEX_SIZE ;
*t1++ = ((float)y+0.5f)/(float)TEX_SIZE ;
*t1++ = ((float)x+0.5f)/(float)TEX_SIZE ;
*t1++ = ((float)y+0.5f)/(float)TEX_SIZE ;
*t1++ = ((float)x+0.5f)/(float)TEX_SIZE ;
*t1++ = ((float)y+0.5f)/(float)TEX_SIZE ;
*t1++ = ((float)x+0.5f)/(float)TEX_SIZE ;
*t1++ = ((float)y+0.5f)/(float)TEX_SIZE ;
*p++ = -1 ; *p++ = -1 ; *p++ = 0.0f ;
*p++ = +1 ; *p++ = -1 ; *p++ = 0.0f ;
*p++ = +1 ; *p++ = +1 ; *p++ = 0.0f ;
*p++ = -1 ; *p++ = +1 ; *p++ = 0.0f ;
}
glGenBuffersARB ( 1, & vbo_collvx ) ;
glBindBufferARB ( GL_ARRAY_BUFFER_ARB, vbo_collvx ) ;
glBufferDataARB ( GL_ARRAY_BUFFER_ARB, colllength * 3 * sizeof(float),
collvertices, GL_STATIC_DRAW_ARB ) ;
glGenBuffersARB ( 1, & vbo_collt0 ) ;
glBindBufferARB ( GL_ARRAY_BUFFER_ARB, vbo_collt0 ) ;
glBufferDataARB ( GL_ARRAY_BUFFER_ARB, colllength * 2 * sizeof(float),
colltexcoords0, GL_STATIC_DRAW_ARB ) ;
glGenBuffersARB ( 1, & vbo_collt1 ) ;
glBindBufferARB ( GL_ARRAY_BUFFER_ARB, vbo_collt1 ) ;
glBufferDataARB ( GL_ARRAY_BUFFER_ARB, colllength * 2 * sizeof(float),
colltexcoords1, GL_STATIC_DRAW_ARB ) ;
glBindBufferARB ( GL_ARRAY_BUFFER_ARB, 0 ) ;
}
void initCubeVBO ()
{
float *p = vertices ;
@@ -528,19 +599,17 @@ void drawCubesTheHardWay ()
float p1 = positionData [ 1 ] ;
float p2 = positionData [ 2 ] ;
glFlush();
restoreFrameBuffer () ;
position -> fetchTexture ( positionData ) ;
rotation -> fetchTexture ( rotationData ) ;
if ( positionData [ 0 ] == p0 &&
positionData [ 1 ] == p1 &&
positionData [ 2 ] == p2 )
{
fprintf ( stderr, "WARNING: If nothing seems to be working, you may\n"
"have an old version of the nVidia driver.\n"
"Version 76.76 is known to be bad.\n" ) ;
}
//if ( positionData [ 0 ] == p0 &&
// positionData [ 1 ] == p1 &&
// positionData [ 2 ] == p2 )
//{
// fprintf ( stderr, "WARNING: If nothing seems to be working, you may\n"
// "have an old version of the nVidia driver.\n"
// "Version 76.76 is known to be bad.\n" ) ;
//}
cubeShader -> use () ; /* Math = Cube shader */
@@ -626,7 +695,9 @@ void drawCubes ()
glMatrixMode ( GL_MODELVIEW ) ;
glLoadIdentity () ;
glTranslatef ( 1.25f, -12.5f, -60.0f ) ; // 10.0, -100.0, -500.0 ) ;
glTranslatef ( 10.0f * (float)TEX_SIZE/128.0f,
-100.0f * (float)TEX_SIZE/128.0f,
-500.0f * (float)TEX_SIZE/128.0f ) ;
glRotatef ( 20.0, 1.0, 0.0, 0.0 ) ;
glEnable ( GL_DEPTH_TEST ) ;
@@ -645,31 +716,23 @@ void drawCubes ()
void runCollisionDetection ()
{
static Clock ck ;
ck.update () ;
double tall=ck.getDeltaTime () ;
static int firsttime = true ;
static unsigned int query = 0 ;
FrameBufferObject *tmp ;
FrameBufferObject *SCM = old ;
FrameBufferObject *DCM = collisions ;
static unsigned int numHits [ NUM_CUBES ] ;
static float texCoordIdent [ NUM_CUBES * 2 ] ;
static int firsttime = true ;
unsigned int numHits ;
if ( firsttime )
{
glGenQueriesARB ( 1, & query ) ;
firsttime = false ;
for ( int y = 0 ; y < TEX_SIZE ; y++ )
for ( int x = 0 ; x < TEX_SIZE ; x++ )
{
texCoordIdent [ idToIndex ( x, y ) * 2 + 0 ] =
(((float) x) + 0.5 )/(float)TEX_SIZE ;
texCoordIdent [ idToIndex ( x, y ) * 2 + 1 ] =
(((float) y) + 0.5 )/(float)TEX_SIZE ;
}
}
/* Mark all polygons 'needed' */
memset ( numHits, 0xFF, NUM_CUBES * sizeof(unsigned int) ) ;
/* Fill SCM with big numbers */
glClearColor ( 1.0f, 1.0f, 1.0f, 1.0f ) ;
@@ -678,7 +741,23 @@ void runCollisionDetection ()
glClearColor ( 0.0f, 0.0f, 0.0f, 0.0f ) ;
force -> prepare ( true ) ; /* Zero out all of the forces. */
bool allDone ;
int numPasses = 0 ;
glPushClientAttrib ( GL_CLIENT_VERTEX_ARRAY_BIT ) ;
glClientActiveTexture( GL_TEXTURE1 ) ;
glEnableClientState ( GL_TEXTURE_COORD_ARRAY ) ;
glBindBufferARB ( GL_ARRAY_BUFFER_ARB, vbo_collt1 ) ;
glTexCoordPointer ( 2, GL_FLOAT, 0, vbo_collt1 ? NULL : colltexcoords1 ) ;
glClientActiveTexture( GL_TEXTURE0 ) ;
glEnableClientState ( GL_TEXTURE_COORD_ARRAY ) ;
glBindBufferARB ( GL_ARRAY_BUFFER_ARB, vbo_collt0 ) ;
glTexCoordPointer ( 2, GL_FLOAT, 0, vbo_collt0 ? NULL : colltexcoords0 ) ;
glEnableClientState ( GL_VERTEX_ARRAY ) ;
glBindBufferARB ( GL_ARRAY_BUFFER_ARB, vbo_collvx ) ;
glVertexPointer ( 3, GL_FLOAT, 0, vbo_collvx ? NULL : collvertices ) ;
while ( true )
{
@@ -689,48 +768,28 @@ void runCollisionDetection ()
/* Fill DCM with zeroes */
DCM -> prepare ( true ) ;
for ( int i = 0 ; i < NUM_CUBES ; i++ )
{
if ( numHits [ i ] != 0 )
{
glMultiTexCoord2fv ( GL_TEXTURE0 + 1, & ( texCoordIdent [ i * 2 ] )) ;
glBeginQueryARB ( GL_SAMPLES_PASSED_ARB, query ) ;
glBeginQueryARB ( GL_SAMPLES_PASSED_ARB, queries [ i ] ) ;
DCM -> fill () ;
glEndQueryARB ( GL_SAMPLES_PASSED_ARB ) ;
}
}
glMultiDrawArraysEXT ( GL_QUADS, (GLint*)& collstart, (GLint*)& colllength,
1 ) ;
numPasses++ ;
allDone = true ;
int numCollisionPairs = 0 ;
for ( int i = 0 ; i < NUM_CUBES ; i++ )
{
if ( numHits [ i ] == 0 ) continue ;
GLuint sampleCount ;
glGetQueryObjectuivARB ( queries[i], GL_QUERY_RESULT_ARB,
&sampleCount ) ;
numHits [ i ] = sampleCount ;
numCollisionPairs += sampleCount ;
if ( sampleCount != 0 )
allDone = false ;
}
if (numCollisionPairs > 0 )
fprintf ( stderr, "%d ", numCollisionPairs ) ;
if ( allDone )
break ;
glEndQueryARB ( GL_SAMPLES_PASSED_ARB ) ;
forceGenerator -> use () ;
forceGenerator -> applyTexture ( "position" , position , 0 ) ;
forceGenerator -> applyTexture ( "force" , force , 1 ) ;
forceGenerator -> applyTexture ( "collisions", DCM , 2 ) ;
GLuint sampleCount ;
glGetQueryObjectuivARB ( query, GL_QUERY_RESULT_ARB, &sampleCount ) ;
//fprintf ( stderr, "%d ", sampleCount ) ;
if ( sampleCount == 0 )
break ;
new_force -> paint () ;
tmp = new_force ;
@@ -741,6 +800,16 @@ fprintf ( stderr, "%d ", numCollisionPairs ) ;
DCM = SCM ;
SCM = tmp ;
}
glBindBufferARB ( GL_ARRAY_BUFFER_ARB, 0 ) ;
glPopClientAttrib () ;
ck.update () ;
double tdone=ck.getDeltaTime () ;
static int ii = 0 ;
ii++;
if (ii%100==0)
fprintf ( stderr, "Performance: %d passes %d cubes: other=%fms collisions=%fms\n", numPasses, NUM_CUBES, tall*1000.0, tdone*1000.0 ) ;
}
@@ -865,7 +934,7 @@ int main ( int argc, char **argv )
initMotionTextures () ;
initPhysicsShaders () ;
initCubeVBO () ;
initCollideVBO () ;
glutMainLoop () ;
return 0 ;
}

View File

@@ -1,10 +1,13 @@
HDRS = fboSupport.h shaderSupport.h GPU_physics.h
OBJS = GPU_physics_demo.o fboSupport.o shaderSupport.o
HDRS = fboSupport.h shaderSupport.h GPU_physics.h clock.h
OBJS = GPU_physics_demo.o fboSupport.o shaderSupport.o clock.o
all: ${OBJS}
g++ -o GPU_physics_demo ${OBJS} -lglut -lGLEW -lGL
clock.o : clock.cpp ${HDRS}
g++ -c clock.cpp
shaderSupport.o : shaderSupport.cpp ${HDRS}
g++ -c shaderSupport.cpp

View File

@@ -0,0 +1,79 @@
#include <stdio.h>
#include <stdlib.h>
#include "GPU_physics.h"
#include "clock.h"
#ifdef GPUP_CYGWIN
typedef long long _int64;
#define LARGEINTEGER _int64
#endif
#ifndef GPUP_WIN32
# include <sys/time.h>
#endif
#include <time.h>
#ifdef GPUP_WIN32
double Clock::res ;
int Clock::perf_timer = -1;
void Clock::initPerformanceTimer ()
{
if ( perf_timer == -1 )
{
/* Use Performance Timer if it's available, mmtimer if not. */
__int64 frequency ;
perf_timer = QueryPerformanceFrequency ( (LARGE_INTEGER *) & frequency ) ;
if ( perf_timer )
{
res = 1.0 / (double) frequency ;
perf_timer = 1 ;
}
}
}
#endif
double Clock::getRawTime () const
{
#ifdef GPUP_WIN32
/* Use Performance Timer if it's available, mmtimer if not. */
if ( perf_timer )
{
__int64 t ;
QueryPerformanceCounter ( (LARGE_INTEGER *) &t ) ;
return res * (double) t ;
}
return (double) timeGetTime() * 0.001 ;
#else
timeval tv ;
gettimeofday ( & tv, NULL ) ;
return (double) tv.tv_sec + (double) tv.tv_usec / 1000000.0 ;
#endif
}
void Clock::update ()
{
now = getRawTime() - start ;
delta = now - last_time ;
last_time = now ;
}

45
Extras/GPUphysics/clock.h Normal file
View File

@@ -0,0 +1,45 @@
/*
High precision clocks.
*/
class Clock
{
double start ;
double now ;
double delta ;
double last_time ;
double max_delta ;
#ifdef GPUP_WIN32
static double res ;
static int perf_timer ;
void initPerformanceTimer () ;
#endif
double getRawTime () const ;
public:
Clock () { reset () ; }
void reset ()
{
#ifdef GPUP_WIN32
initPerformanceTimer () ;
#endif
start = getRawTime () ;
now = 0.0 ;
max_delta = 0.2 ;
delta = 0.0000001 ; /* Faked so stoopid programs won't div0 */
last_time = 0.0 ;
}
void setMaxDelta ( double maxDelta ) { max_delta = maxDelta ; }
double getMaxDelta () const { return max_delta ; }
void update () ;
double getAbsTime () const { return now ; }
double getDeltaTime () const { return delta ; }
double getFrameRate () const { return 1.0 / delta ; }
} ;