Files
bullet3/Extras/RigidBodyGpuPipeline/opencl/integration/main.cpp

1107 lines
27 KiB
C++

//starts crashing when more than 32700 objects on my Geforce 260, unless _USE_SUB_DATA is defined (still unstable though)
//runs fine with fewer objects
#define NUM_OBJECTS_X 327
#define NUM_OBJECTS_Y 20
#define NUM_OBJECTS_Z 20
//#define NUM_OBJECTS_Z 20
//#define _USE_SUB_DATA
//#define NUM_OBJECTS_X 100
//#define NUM_OBJECTS_Y 100
//#define NUM_OBJECTS_Z 100
///RECREATE_CL_AND_SHADERS_ON_RESIZE will delete and re-create OpenCL and GLSL shaders/buffers at each resize
//#define RECREATE_CL_AND_SHADERS_ON_RESIZE
///
/// OpenCL - OpenGL interop example. Updating transforms of many cubes on GPU, without going through main memory/using the PCIe bus
/// Create all OpenGL resources AFTER create OpenCL context!
///
#include <GL/glew.h>
#include <stdio.h>
#include "btGlutInclude.h"
#include "../opengl_interop/btStopwatch.h"
#include "btVector3.h"
#include "btQuaternion.h"
#include "btMatrix3x3.h"
static float angle(0);
#include <assert.h>
#ifdef _WIN32
#include <windows.h>
#endif
//OpenCL stuff
#include "../basic_initialize/btOpenCLUtils.h"
#include "../opengl_interop/btOpenCLGLInteropBuffer.h"
cl_context g_cxMainContext;
cl_command_queue g_cqCommandQue;
cl_device_id g_device;
static const size_t workGroupSize = 128;
cl_mem gLinVelMem;
cl_mem gAngVelMem;
btOpenCLGLInteropBuffer* g_interopBuffer = 0;
cl_kernel g_interopKernel;
////for Adl
#include <Adl/Adl.h>
adl::DeviceCL* g_deviceCL=0;
bool useCPU = false;
bool printStats = false;
bool runOpenCLKernels = true;
#define MSTRINGIFY(A) #A
static char* interopKernelString =
#include "integrateKernel.cl"
btStopwatch gStopwatch;
int m_glutScreenWidth = 640;
int m_glutScreenHeight= 480;
bool m_ortho = false;
static GLuint instancingShader; // The instancing renderer
static GLuint cube_vao;
static GLuint cube_vbo;
static GLuint index_vbo;
static GLuint m_texturehandle;
static bool done = false;
static GLint angle_loc = 0;
static GLint ModelViewMatrix;
static GLint ProjectionMatrix;
static GLint uniform_texture_diffuse = 0;
//used for dynamic loading from disk (default switched off)
#define MAX_SHADER_LENGTH 8192
static GLubyte shaderText[MAX_SHADER_LENGTH];
static const char* vertexShader= \
"#version 330\n"
"precision highp float;\n"
"\n"
"\n"
"\n"
"layout (location = 0) in vec4 position;\n"
"layout (location = 1) in vec4 instance_position;\n"
"layout (location = 2) in vec4 instance_quaternion;\n"
"layout (location = 3) in vec2 uvcoords;\n"
"layout (location = 4) in vec3 vertexnormal;\n"
"\n"
"\n"
"uniform float angle = 0.0;\n"
"uniform mat4 ModelViewMatrix;\n"
"uniform mat4 ProjectionMatrix;\n"
"\n"
"out Fragment\n"
"{\n"
" vec4 color;\n"
"} fragment;\n"
"\n"
"out Vert\n"
"{\n"
" vec2 texcoord;\n"
"} vert;\n"
"\n"
"\n"
"vec4 quatMul ( in vec4 q1, in vec4 q2 )\n"
"{\n"
" vec3 im = q1.w * q2.xyz + q1.xyz * q2.w + cross ( q1.xyz, q2.xyz );\n"
" vec4 dt = q1 * q2;\n"
" float re = dot ( dt, vec4 ( -1.0, -1.0, -1.0, 1.0 ) );\n"
" return vec4 ( im, re );\n"
"}\n"
"\n"
"vec4 quatFromAxisAngle(vec4 axis, in float angle)\n"
"{\n"
" float cah = cos(angle*0.5);\n"
" float sah = sin(angle*0.5);\n"
" float d = inversesqrt(dot(axis,axis));\n"
" vec4 q = vec4(axis.x*sah*d,axis.y*sah*d,axis.z*sah*d,cah);\n"
" return q;\n"
"}\n"
"//\n"
"// vector rotation via quaternion\n"
"//\n"
"vec4 quatRotate3 ( in vec3 p, in vec4 q )\n"
"{\n"
" vec4 temp = quatMul ( q, vec4 ( p, 0.0 ) );\n"
" return quatMul ( temp, vec4 ( -q.x, -q.y, -q.z, q.w ) );\n"
"}\n"
"vec4 quatRotate ( in vec4 p, in vec4 q )\n"
"{\n"
" vec4 temp = quatMul ( q, p );\n"
" return quatMul ( temp, vec4 ( -q.x, -q.y, -q.z, q.w ) );\n"
"}\n"
"\n"
"out vec3 lightDir,normal,ambient;\n"
"\n"
"void main(void)\n"
"{\n"
" vec4 q = instance_quaternion;\n"
" ambient = vec3(0.2,0.2,0.2);\n"
" \n"
" \n"
" vec4 local_normal = (quatRotate3( vertexnormal,q));\n"
" vec3 light_pos = vec3(1000,1000,1000);\n"
" normal = normalize(ModelViewMatrix * local_normal).xyz;\n"
"\n"
" lightDir = normalize(light_pos);//gl_LightSource[0].position.xyz));\n"
"// lightDir = normalize(vec3(gl_LightSource[0].position));\n"
" \n"
" vec4 axis = vec4(1,1,1,0);\n"
" vec4 localcoord = quatRotate3( position.xyz,q);\n"
" vec4 vertexPos = ProjectionMatrix * ModelViewMatrix *(instance_position+localcoord);\n"
"\n"
" gl_Position = vertexPos;\n"
" \n"
"// fragment.color = instance_color;\n"
" vert.texcoord = uvcoords;\n"
"}\n"
;
static const char* fragmentShader= \
"#version 330\n"
"precision highp float;\n"
"\n"
"in Fragment\n"
"{\n"
" vec4 color;\n"
"} fragment;\n"
"\n"
"in Vert\n"
"{\n"
" vec2 texcoord;\n"
"} vert;\n"
"\n"
"uniform sampler2D Diffuse;\n"
"\n"
"in vec3 lightDir,normal,ambient;\n"
"\n"
"out vec4 color;\n"
"\n"
"void main_textured(void)\n"
"{\n"
" color = texture2D(Diffuse,vert.texcoord);//fragment.color;\n"
"}\n"
"\n"
"void main(void)\n"
"{\n"
" vec4 texel = texture2D(Diffuse,vert.texcoord);//fragment.color;\n"
" vec3 ct,cf;\n"
" float intensity,at,af;\n"
" intensity = max(dot(lightDir,normalize(normal)),0.0);\n"
" cf = intensity*vec3(1.0,1.0,1.0);//intensity * (gl_FrontMaterial.diffuse).rgb+ambient;//gl_FrontMaterial.ambient.rgb;\n"
" af = 1.0;\n"
" \n"
" ct = texel.rgb;\n"
" at = texel.a;\n"
" \n"
" color = vec4(ct * cf, at * af); \n"
"}\n"
;
// Load the shader from the source text
void gltLoadShaderSrc(const char *szShaderSrc, GLuint shader)
{
GLchar *fsStringPtr[1];
fsStringPtr[0] = (GLchar *)szShaderSrc;
glShaderSource(shader, 1, (const GLchar **)fsStringPtr, NULL);
}
////////////////////////////////////////////////////////////////
// Load the shader from the specified file. Returns false if the
// shader could not be loaded
bool gltLoadShaderFile(const char *szFile, GLuint shader)
{
GLint shaderLength = 0;
FILE *fp;
// Open the shader file
fp = fopen(szFile, "r");
if(fp != NULL)
{
// See how long the file is
while (fgetc(fp) != EOF)
shaderLength++;
// Allocate a block of memory to send in the shader
assert(shaderLength < MAX_SHADER_LENGTH); // make me bigger!
if(shaderLength > MAX_SHADER_LENGTH)
{
fclose(fp);
return false;
}
// Go back to beginning of file
rewind(fp);
// Read the whole file in
if (shaderText != NULL)
fread(shaderText, 1, shaderLength, fp);
// Make sure it is null terminated and close the file
shaderText[shaderLength] = '\0';
fclose(fp);
}
else
return false;
// printf(shaderText);
// Load the string
gltLoadShaderSrc((const char *)shaderText, shader);
return true;
}
/////////////////////////////////////////////////////////////////
// Load a pair of shaders, compile, and link together. Specify the complete
// file path for each shader. Note, there is no support for
// just loading say a vertex program... you have to do both.
GLuint gltLoadShaderPair(const char *szVertexProg, const char *szFragmentProg, bool loadFromFile)
{
// Temporary Shader objects
GLuint hVertexShader;
GLuint hFragmentShader;
GLuint hReturn = 0;
GLint testVal;
// Create shader objects
hVertexShader = glCreateShader(GL_VERTEX_SHADER);
hFragmentShader = glCreateShader(GL_FRAGMENT_SHADER);
if (loadFromFile)
{
if(gltLoadShaderFile(szVertexProg, hVertexShader) == false)
{
glDeleteShader(hVertexShader);
glDeleteShader(hFragmentShader);
return (GLuint)NULL;
}
if(gltLoadShaderFile(szFragmentProg, hFragmentShader) == false)
{
glDeleteShader(hVertexShader);
glDeleteShader(hFragmentShader);
return (GLuint)NULL;
}
} else
{
gltLoadShaderSrc(vertexShader, hVertexShader);
gltLoadShaderSrc(fragmentShader, hFragmentShader);
}
// Compile them
glCompileShader(hVertexShader);
glCompileShader(hFragmentShader);
// Check for errors
glGetShaderiv(hVertexShader, GL_COMPILE_STATUS, &testVal);
if(testVal == GL_FALSE)
{
char temp[256] = "";
glGetShaderInfoLog( hVertexShader, 256, NULL, temp);
fprintf( stderr, "Compile failed:\n%s\n", temp);
assert(0);
exit(0);
glDeleteShader(hVertexShader);
glDeleteShader(hFragmentShader);
return (GLuint)NULL;
}
glGetShaderiv(hFragmentShader, GL_COMPILE_STATUS, &testVal);
if(testVal == GL_FALSE)
{
char temp[256] = "";
glGetShaderInfoLog( hFragmentShader, 256, NULL, temp);
fprintf( stderr, "Compile failed:\n%s\n", temp);
assert(0);
exit(0);
glDeleteShader(hVertexShader);
glDeleteShader(hFragmentShader);
return (GLuint)NULL;
}
// Link them - assuming it works...
hReturn = glCreateProgram();
glAttachShader(hReturn, hVertexShader);
glAttachShader(hReturn, hFragmentShader);
glLinkProgram(hReturn);
// These are no longer needed
glDeleteShader(hVertexShader);
glDeleteShader(hFragmentShader);
// Make sure link worked too
glGetProgramiv(hReturn, GL_LINK_STATUS, &testVal);
if(testVal == GL_FALSE)
{
glDeleteProgram(hReturn);
return (GLuint)NULL;
}
return hReturn;
}
///position xyz, unused w, normal, uv
static const GLfloat cube_vertices[] =
{
-1.0f, -1.0f, 1.0f, 0.0f, 0,0,1, 0,0,//0
1.0f, -1.0f, 1.0f, 0.0f, 0,0,1, 1,0,//1
1.0f, 1.0f, 1.0f, 0.0f, 0,0,1, 1,1,//2
-1.0f, 1.0f, 1.0f, 0.0f, 0,0,1, 0,1 ,//3
-1.0f, -1.0f, -1.0f, 1.0f, 0,0,-1, 0,0,//4
1.0f, -1.0f, -1.0f, 1.0f, 0,0,-1, 1,0,//5
1.0f, 1.0f, -1.0f, 1.0f, 0,0,-1, 1,1,//6
-1.0f, 1.0f, -1.0f, 1.0f, 0,0,-1, 0,1,//7
-1.0f, -1.0f, -1.0f, 1.0f, -1,0,0, 0,0,
-1.0f, 1.0f, -1.0f, 1.0f, -1,0,0, 1,0,
-1.0f, 1.0f, 1.0f, 1.0f, -1,0,0, 1,1,
-1.0f, -1.0f, 1.0f, 1.0f, -1,0,0, 0,1,
1.0f, -1.0f, -1.0f, 1.0f, 1,0,0, 0,0,
1.0f, 1.0f, -1.0f, 1.0f, 1,0,0, 1,0,
1.0f, 1.0f, 1.0f, 1.0f, 1,0,0, 1,1,
1.0f, -1.0f, 1.0f, 1.0f, 1,0,0, 0,1,
-1.0f, -1.0f, -1.0f, 1.0f, 0,-1,0, 0,0,
-1.0f, -1.0f, 1.0f, 1.0f, 0,-1,0, 1,0,
1.0f, -1.0f, 1.0f, 1.0f, 0,-1,0, 1,1,
1.0f,-1.0f, -1.0f, 1.0f, 0,-1,0, 0,1,
-1.0f, 1.0f, -1.0f, 1.0f, 0,1,0, 0,0,
-1.0f, 1.0f, 1.0f, 1.0f, 0,1,0, 1,0,
1.0f, 1.0f, 1.0f, 1.0f, 0,1,0, 1,1,
1.0f,1.0f, -1.0f, 1.0f, 0,1,0, 0,1,
};
static const int cube_indices[]=
{
0,1,2,0,2,3,//ground face
4,5,6,4,6,7,//top face
8,9,10,8,10,11,
12,13,14,12,14,15,
16,17,18,16,18,19,
20,21,22,20,22,23
};
void DeleteCL()
{
clReleaseContext(g_cxMainContext);
clReleaseCommandQueue(g_cqCommandQue);
}
void InitCL()
{
void* glCtx=0;
void* glDC = 0;
#ifdef _WIN32
glCtx = wglGetCurrentContext();
#else //!_WIN32
GLXContext glCtx = glXGetCurrentContext();
#endif //!_WIN32
glDC = wglGetCurrentDC();
int ciErrNum = 0;
cl_device_type deviceType = CL_DEVICE_TYPE_ALL;//GPU;
g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC);
oclCHECKERROR(ciErrNum, CL_SUCCESS);
int numDev = btOpenCLUtils::getNumDevices(g_cxMainContext);
if (numDev>0)
{
g_device= btOpenCLUtils::getDevice(g_cxMainContext,0);
btOpenCLDeviceInfo clInfo;
btOpenCLUtils::getDeviceInfo(g_device,clInfo);
btOpenCLUtils::printDeviceInfo(g_device);
// create a command-queue
g_cqCommandQue = clCreateCommandQueue(g_cxMainContext, g_device, 0, &ciErrNum);
oclCHECKERROR(ciErrNum, CL_SUCCESS);
//normally you would create and execute kernels using this command queue
}
}
#define NUM_OBJECTS (NUM_OBJECTS_X*NUM_OBJECTS_Y*NUM_OBJECTS_Z)
#define POSITION_BUFFER_SIZE (NUM_OBJECTS*sizeof(float)*4)
#define ORIENTATION_BUFFER_SIZE (NUM_OBJECTS*sizeof(float)*4)
GLfloat* instance_positions_ptr = 0;
GLfloat* instance_quaternion_ptr = 0;
void DeleteShaders()
{
glDeleteVertexArrays(1, &cube_vao);
glDeleteBuffers(1,&index_vbo);
glDeleteBuffers(1,&cube_vbo);
glDeleteProgram(instancingShader);
}
void writeTransforms()
{
glFlush();
char* bla = (char*)glMapBuffer( GL_ARRAY_BUFFER,GL_READ_WRITE);//GL_WRITE_ONLY
float* positions = (float*)(bla+sizeof(cube_vertices));
float* orientations = (float*)(bla+sizeof(cube_vertices) + POSITION_BUFFER_SIZE);
// positions[0]+=0.001f;
static int offset=0;
//offset++;
static btVector3 axis(1,0,0);
angle += 0.01f;
int index=0;
btQuaternion orn(axis,angle);
for (int i=0;i<NUM_OBJECTS_X;i++)
{
for (int j=0;j<NUM_OBJECTS_Y;j++)
{
for (int k=0;k<NUM_OBJECTS_Z;k++)
{
//if (!((index+offset)%15))
{
instance_positions_ptr[index*4+1]-=0.01f;
positions[index*4]=instance_positions_ptr[index*4];
positions[index*4+1]=instance_positions_ptr[index*4+1];
positions[index*4+2]=instance_positions_ptr[index*4+2];
positions[index*4+3]=instance_positions_ptr[index*4+3];
orientations[index*4] = orn[0];
orientations[index*4+1] = orn[1];
orientations[index*4+2] = orn[2];
orientations[index*4+3] = orn[3];
}
// memcpy((void*)&orientations[index*4],orn,sizeof(btQuaternion));
index++;
}
}
}
glUnmapBuffer( GL_ARRAY_BUFFER);
//if this glFinish is removed, the animation is not always working/blocks
//@todo: figure out why
glFlush();
}
void InitShaders()
{
bool loadFromFile = false;
instancingShader = gltLoadShaderPair("instancing.vs","instancing.fs", loadFromFile);
glLinkProgram(instancingShader);
glUseProgram(instancingShader);
angle_loc = glGetUniformLocation(instancingShader, "angle");
ModelViewMatrix = glGetUniformLocation(instancingShader, "ModelViewMatrix");
ProjectionMatrix = glGetUniformLocation(instancingShader, "ProjectionMatrix");
uniform_texture_diffuse = glGetUniformLocation(instancingShader, "Diffuse");
GLuint offset = 0;
glGenBuffers(1, &cube_vbo);
glBindBuffer(GL_ARRAY_BUFFER, cube_vbo);
instance_positions_ptr = (GLfloat*)new float[NUM_OBJECTS*4];
instance_quaternion_ptr = (GLfloat*)new float[NUM_OBJECTS*4];
int index=0;
for (int i=0;i<NUM_OBJECTS_X;i++)
{
for (int j=0;j<NUM_OBJECTS_Y;j++)
{
for (int k=0;k<NUM_OBJECTS_Z;k++)
{
instance_positions_ptr[index*4]=-(i-NUM_OBJECTS_X/2)*10;
instance_positions_ptr[index*4+1]=-(j-NUM_OBJECTS_Y/2)*10;
instance_positions_ptr[index*4+2]=-k*10;
instance_positions_ptr[index*4+3]=1;
instance_quaternion_ptr[index*4]=0;
instance_quaternion_ptr[index*4+1]=0;
instance_quaternion_ptr[index*4+2]=0;
instance_quaternion_ptr[index*4+3]=1;
index++;
}
}
}
int size = sizeof(cube_vertices) + POSITION_BUFFER_SIZE+ORIENTATION_BUFFER_SIZE;
char* bla = (char*)malloc(size);
int szc = sizeof(cube_vertices);
memcpy(bla,&cube_vertices[0],szc);
memcpy(bla+sizeof(cube_vertices),instance_positions_ptr,POSITION_BUFFER_SIZE);
memcpy(bla+sizeof(cube_vertices)+POSITION_BUFFER_SIZE,instance_quaternion_ptr,ORIENTATION_BUFFER_SIZE);
glBufferData(GL_ARRAY_BUFFER, size, bla, GL_DYNAMIC_DRAW);//GL_STATIC_DRAW);
///initialize parts of the buffer
#ifdef _USE_SUB_DATA
glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(cube_vertices)+ 16384, bla);//cube_vertices);
#endif
char* dest= (char*)glMapBuffer( GL_ARRAY_BUFFER,GL_WRITE_ONLY);//GL_WRITE_ONLY
memcpy(dest,cube_vertices,sizeof(cube_vertices));
//memcpy(dest+sizeof(cube_vertices),instance_colors,sizeof(instance_colors));
glUnmapBuffer( GL_ARRAY_BUFFER);
writeTransforms();
/*
glBufferSubData(GL_ARRAY_BUFFER, sizeof(cube_vertices) + sizeof(instance_colors), POSITION_BUFFER_SIZE, instance_positions_ptr);
glBufferSubData(GL_ARRAY_BUFFER, sizeof(cube_vertices) + sizeof(instance_colors)+POSITION_BUFFER_SIZE,ORIENTATION_BUFFER_SIZE , instance_quaternion_ptr);
*/
glGenVertexArrays(1, &cube_vao);
glBindVertexArray(cube_vao);
glBindBuffer(GL_ARRAY_BUFFER, cube_vbo);
glBindVertexArray(0);
glGenBuffers(1, &index_vbo);
int indexBufferSize = sizeof(cube_indices);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_vbo);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, indexBufferSize, NULL, GL_STATIC_DRAW);
glBufferSubData(GL_ELEMENT_ARRAY_BUFFER,0,indexBufferSize,cube_indices);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
glBindBuffer(GL_ARRAY_BUFFER,0);
glBindVertexArray(0);
}
void updateCamera() {
float m_ele = 0;
float m_azi=0;
btVector3 m_cameraPosition(12,20,12);
btVector3 m_cameraTargetPosition(0,10,0);
btVector3 m_cameraUp(0,1,0);
int m_forwardAxis=2;
btScalar m_cameraDistance = 130;
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
float m_frustumZNear=1;
float m_frustumZFar=1000;
if (m_glutScreenWidth == 0 && m_glutScreenHeight == 0)
return;
float aspect;
btVector3 extents;
if (m_glutScreenWidth > m_glutScreenHeight)
{
aspect = m_glutScreenWidth / (float)m_glutScreenHeight;
extents.setValue(aspect * 1.0f, 1.0f,0);
} else
{
aspect = m_glutScreenHeight / (float)m_glutScreenWidth;
extents.setValue(1.0f, aspect*1.f,0);
}
if (m_ortho)
{
// reset matrix
glLoadIdentity();
extents *= m_cameraDistance;
btVector3 lower = m_cameraTargetPosition - extents;
btVector3 upper = m_cameraTargetPosition + extents;
glOrtho(lower.getX(), upper.getX(), lower.getY(), upper.getY(),-1000,1000);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
} else
{
if (m_glutScreenWidth > m_glutScreenHeight)
{
glFrustum (-aspect * m_frustumZNear, aspect * m_frustumZNear, -m_frustumZNear, m_frustumZNear, m_frustumZNear, m_frustumZFar);
} else
{
glFrustum (-aspect * m_frustumZNear, aspect * m_frustumZNear, -m_frustumZNear, m_frustumZNear, m_frustumZNear, m_frustumZFar);
}
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
gluLookAt(m_cameraPosition[0], m_cameraPosition[1], m_cameraPosition[2],
m_cameraTargetPosition[0], m_cameraTargetPosition[1], m_cameraTargetPosition[2],
m_cameraUp.getX(),m_cameraUp.getY(),m_cameraUp.getZ());
}
}
void myinit()
{
// GLfloat light_ambient[] = { btScalar(0.2), btScalar(0.2), btScalar(0.2), btScalar(1.0) };
GLfloat light_ambient[] = { btScalar(1.0), btScalar(1.2), btScalar(0.2), btScalar(1.0) };
GLfloat light_diffuse[] = { btScalar(1.0), btScalar(1.0), btScalar(1.0), btScalar(1.0) };
GLfloat light_specular[] = { btScalar(1.0), btScalar(1.0), btScalar(1.0), btScalar(1.0 )};
/* light_position is NOT default value */
GLfloat light_position0[] = { btScalar(1000.0), btScalar(1000.0), btScalar(1000.0), btScalar(0.0 )};
GLfloat light_position1[] = { btScalar(-1.0), btScalar(-10.0), btScalar(-1.0), btScalar(0.0) };
glLightfv(GL_LIGHT0, GL_AMBIENT, light_ambient);
glLightfv(GL_LIGHT0, GL_DIFFUSE, light_diffuse);
glLightfv(GL_LIGHT0, GL_SPECULAR, light_specular);
glLightfv(GL_LIGHT0, GL_POSITION, light_position0);
glLightfv(GL_LIGHT1, GL_AMBIENT, light_ambient);
glLightfv(GL_LIGHT1, GL_DIFFUSE, light_diffuse);
glLightfv(GL_LIGHT1, GL_SPECULAR, light_specular);
glLightfv(GL_LIGHT1, GL_POSITION, light_position1);
glEnable(GL_LIGHTING);
glEnable(GL_LIGHT0);
glEnable(GL_LIGHT1);
// glShadeModel(GL_FLAT);//GL_SMOOTH);
glShadeModel(GL_SMOOTH);
glEnable(GL_DEPTH_TEST);
glDepthFunc(GL_LESS);
glClearColor(float(0.7),float(0.7),float(0.7),float(0));
glEnable(GL_LIGHTING);
glEnable(GL_LIGHT0);
static bool m_textureenabled = true;
static bool m_textureinitialized = false;
if(m_textureenabled)
{
if(!m_textureinitialized)
{
glActiveTexture(GL_TEXTURE0);
GLubyte* image=new GLubyte[256*256*3];
for(int y=0;y<256;++y)
{
const int t=y>>5;
GLubyte* pi=image+y*256*3;
for(int x=0;x<256;++x)
{
const int s=x>>5;
const GLubyte b=180;
GLubyte c=b+((s+t&1)&1)*(255-b);
pi[0]=255;
pi[1]=c;
pi[2]=c;
pi+=3;
}
}
glGenTextures(1,(GLuint*)&m_texturehandle);
glBindTexture(GL_TEXTURE_2D,m_texturehandle);
glTexEnvf(GL_TEXTURE_ENV,GL_TEXTURE_ENV_MODE,GL_MODULATE);
glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_MIN_FILTER,GL_LINEAR_MIPMAP_LINEAR);
glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_MAG_FILTER,GL_LINEAR_MIPMAP_LINEAR);
glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_WRAP_S,GL_REPEAT);
glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_WRAP_T,GL_REPEAT);
gluBuild2DMipmaps(GL_TEXTURE_2D,3,256,256,GL_RGB,GL_UNSIGNED_BYTE,image);
delete[] image;
m_textureinitialized=true;
}
// glMatrixMode(GL_TEXTURE);
// glLoadIdentity();
// glMatrixMode(GL_MODELVIEW);
glEnable(GL_TEXTURE_2D);
glBindTexture(GL_TEXTURE_2D,m_texturehandle);
} else
{
glDisable(GL_TEXTURE_2D);
}
glEnable(GL_COLOR_MATERIAL);
// glEnable(GL_CULL_FACE);
// glCullFace(GL_BACK);
}
//#pragma optimize( "g", off )
void updatePos()
{
if (useCPU)
{
int index=0;
for (int i=0;i<NUM_OBJECTS_X;i++)
{
for (int j=0;j<NUM_OBJECTS_Y;j++)
{
for (int k=0;k<NUM_OBJECTS_Z;k++)
{
//if (!((index+offset)%15))
{
instance_positions_ptr[index*4+1]-=0.01f;
}
}
}
}
writeTransforms();
}
else
{
glFinish();
cl_mem clBuffer = g_interopBuffer->getCLBUffer();
cl_int ciErrNum = CL_SUCCESS;
ciErrNum = clEnqueueAcquireGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, NULL);
oclCHECKERROR(ciErrNum, CL_SUCCESS);
if (runOpenCLKernels)
{
int numObjects = NUM_OBJECTS;
int offset = (sizeof(cube_vertices) )/4;
ciErrNum = clSetKernelArg(g_interopKernel, 0, sizeof(int), &offset);
ciErrNum = clSetKernelArg(g_interopKernel, 1, sizeof(int), &numObjects);
ciErrNum = clSetKernelArg(g_interopKernel, 2, sizeof(cl_mem), (void*)&clBuffer );
ciErrNum = clSetKernelArg(g_interopKernel, 3, sizeof(cl_mem), (void*)&gLinVelMem);
ciErrNum = clSetKernelArg(g_interopKernel, 4, sizeof(cl_mem), (void*)&gAngVelMem);
size_t numWorkItems = workGroupSize*((NUM_OBJECTS + (workGroupSize-1)) / workGroupSize);
ciErrNum = clEnqueueNDRangeKernel(g_cqCommandQue, g_interopKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
oclCHECKERROR(ciErrNum, CL_SUCCESS);
}
ciErrNum = clEnqueueReleaseGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, 0);
oclCHECKERROR(ciErrNum, CL_SUCCESS);
clFinish(g_cqCommandQue);
}
}
//#pragma optimize( "g", on )
void RenderScene(void)
{
#if 0
float modelview[20]={0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9};
// get the current modelview matrix
glGetFloatv(GL_MODELVIEW_MATRIX , modelview);
float projection[20]={0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9};
glGetFloatv(GL_PROJECTION_MATRIX, projection);
#endif
myinit();
updateCamera();
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
//render coordinate system
glBegin(GL_LINES);
glColor3f(1,0,0);
glVertex3f(0,0,0);
glVertex3f(1,0,0);
glColor3f(0,1,0);
glVertex3f(0,0,0);
glVertex3f(0,1,0);
glColor3f(0,0,1);
glVertex3f(0,0,0);
glVertex3f(0,0,1);
glEnd();
//do a finish, to make sure timings are clean
// glFinish();
float start = gStopwatch.getTimeMilliseconds();
// glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindBuffer(GL_ARRAY_BUFFER, cube_vbo);
glFlush();
updatePos();
float stop = gStopwatch.getTimeMilliseconds();
gStopwatch.reset();
if (printStats)
{
printf("updatePos=%f ms on ",stop-start);
if (useCPU)
{
printf("CPU \n");
} else
{
printf("OpenCL ");
if (runOpenCLKernels)
printf("running the kernels");
else
printf("without running the kernels");
printf("\n");
}
}
glBindVertexArray(cube_vao);
glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, 9*sizeof(float), 0);
glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, 0, (GLvoid *)(sizeof(cube_vertices)));
glVertexAttribPointer(2, 4, GL_FLOAT, GL_FALSE, 0, (GLvoid *)(sizeof(cube_vertices)+POSITION_BUFFER_SIZE));
int uvoffset = 7*sizeof(float);
int normaloffset = 4*sizeof(float);
glVertexAttribPointer(3, 2, GL_FLOAT, GL_FALSE, 9*sizeof(float), (GLvoid *)uvoffset);
glVertexAttribPointer(4, 3, GL_FLOAT, GL_FALSE, 9*sizeof(float), (GLvoid *)normaloffset);
glEnableVertexAttribArray(0);
glEnableVertexAttribArray(1);
glEnableVertexAttribArray(2);
glEnableVertexAttribArray(3);
glEnableVertexAttribArray(4);
glVertexAttribDivisor(1, 1);
glVertexAttribDivisor(2, 1);
glVertexAttribDivisor(3, 0);
glVertexAttribDivisor(4, 0);
glUseProgram(instancingShader);
glUniform1f(angle_loc, 0);
GLfloat pm[16];
glGetFloatv(GL_PROJECTION_MATRIX, pm);
glUniformMatrix4fv(ProjectionMatrix, 1, false, &pm[0]);
GLfloat mvm[16];
glGetFloatv(GL_MODELVIEW_MATRIX, mvm);
glUniformMatrix4fv(ModelViewMatrix, 1, false, &mvm[0]);
glUniform1i(uniform_texture_diffuse, 0);
glFlush();
int numInstances = NUM_OBJECTS;
int indexCount = sizeof(cube_indices)/sizeof(int);
int indexOffset = 0;
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_vbo);
glDrawElementsInstanced(GL_TRIANGLES, indexCount, GL_UNSIGNED_INT, (void*)indexOffset, numInstances);
glUseProgram(0);
glBindBuffer(GL_ARRAY_BUFFER,0);
glBindVertexArray(0);
glutSwapBuffers();
glutPostRedisplay();
GLint err = glGetError();
assert(err==GL_NO_ERROR);
}
void ChangeSize(int w, int h)
{
m_glutScreenWidth = w;
m_glutScreenHeight = h;
#ifdef RECREATE_CL_AND_SHADERS_ON_RESIZE
delete g_interopBuffer;
clReleaseKernel(g_interopKernel);
DeleteCL();
DeleteShaders();
#endif //RECREATE_CL_AND_SHADERS_ON_RESIZE
// Set Viewport to window dimensions
glViewport(0, 0, w, h);
#ifdef RECREATE_CL_AND_SHADERS_ON_RESIZE
InitCL();
InitShaders();
g_interopBuffer = new btOpenCLGLInteropBuffer(g_cxMainContext,g_cqCommandQue,cube_vbo);
clFinish(g_cqCommandQue);
g_interopKernel = btOpenCLUtils::compileCLKernelFromString(g_cxMainContext, interopKernelString, "interopKernel" );
#endif //RECREATE_CL_AND_SHADERS_ON_RESIZE
}
void Keyboard(unsigned char key, int x, int y)
{
switch (key)
{
case 27:
done = true;
break;
case 'O':
case 'o':
{
m_ortho = !m_ortho;
break;
}
case 'c':
case 'C':
{
useCPU = !useCPU;
if (useCPU)
printf("using CPU\n");
else
printf("using OpenCL\n");
break;
}
case 's':
case 'S':
{
printStats = !printStats;
break;
}
case 'k':
case 'K':
{
runOpenCLKernels=!runOpenCLKernels;
break;
}
case 'q':
case 'Q':
exit(0);
default:
break;
}
}
// Cleanup
void ShutdownRC(void)
{
glDeleteBuffers(1, &cube_vbo);
glDeleteVertexArrays(1, &cube_vao);
}
int main(int argc, char* argv[])
{
// printf("vertexShader = \n%s\n",vertexShader);
// printf("fragmentShader = \n%s\n",fragmentShader);
glutInit(&argc, argv);
glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGBA);
glutInitWindowSize(m_glutScreenWidth, m_glutScreenHeight);
char buf[1024];
sprintf(buf,"OpenCL - OpenGL interop, transforms %d cubes on the GPU (use c to toggle CPU/CL)", NUM_OBJECTS);
glutCreateWindow(buf);
glutReshapeFunc(ChangeSize);
glutKeyboardFunc(Keyboard);
glutDisplayFunc(RenderScene);
GLenum err = glewInit();
if (GLEW_OK != err)
{
/* Problem: glewInit failed, something is seriously wrong. */
fprintf(stderr, "Error: %s\n", glewGetErrorString(err));
}
//ChangeSize(m_glutScreenWidth,m_glutScreenHeight);
InitCL();
#define CUSTOM_CL_INITIALIZATION
#ifdef CUSTOM_CL_INITIALIZATION
g_deviceCL = new adl::DeviceCL();
g_deviceCL->m_deviceIdx = g_device;
g_deviceCL->m_context = g_cxMainContext;
g_deviceCL->m_commandQueue = g_cqCommandQue;
#else
DeviceUtils::Config cfg;
cfg.m_type = DeviceUtils::Config::DEVICE_CPU;
g_deviceCL = DeviceUtils::allocate( TYPE_CL, cfg );
#endif
int size = NUM_OBJECTS;
adl::Buffer<btVector3> linvelBuf( g_deviceCL, size );
adl::Buffer<btVector3> angvelBuf( g_deviceCL, size );
gLinVelMem = (cl_mem)linvelBuf.m_ptr;
gAngVelMem = (cl_mem)angvelBuf.m_ptr;
btVector3* linVelHost= new btVector3[size];
btVector3* angVelHost = new btVector3[size];
for (int i=0;i<NUM_OBJECTS;i++)
{
linVelHost[i].setValue(0,0,0);
angVelHost[i].setValue(1,0,0);
}
linvelBuf.write(linVelHost,NUM_OBJECTS);
angvelBuf.write(angVelHost,NUM_OBJECTS);
adl::DeviceUtils::waitForCompletion( g_deviceCL );
InitShaders();
g_interopBuffer = new btOpenCLGLInteropBuffer(g_cxMainContext,g_cqCommandQue,cube_vbo);
clFinish(g_cqCommandQue);
g_interopKernel = btOpenCLUtils::compileCLKernelFromString(g_cxMainContext, g_device,interopKernelString, "interopKernel" );
glutMainLoop();
ShutdownRC();
return 0;
}