added OpenCL cloth demo, contributed by AMD.
updated GpuSoftBodySolvers updated DirectCompute cloth demo
This commit is contained in:
535
Demos/OpenCLClothDemo/shaders.cl
Normal file
535
Demos/OpenCLClothDemo/shaders.cl
Normal file
@@ -0,0 +1,535 @@
|
||||
#pragma OPENCL EXTENSION cl_amd_printf : enable
|
||||
|
||||
#define float3 float4
|
||||
#define uint3 uint4
|
||||
|
||||
#define PARTICLE_RADIUS 0.05;
|
||||
|
||||
#define width 1280
|
||||
#define height 1024
|
||||
|
||||
#define B 0
|
||||
#define T height
|
||||
#define L 0
|
||||
#define R width
|
||||
|
||||
#define shiftNumber 4
|
||||
#define shiftMask 0xF
|
||||
#define shiftValue 16.0f
|
||||
#define stride 4
|
||||
|
||||
#define screenWidth1 width
|
||||
#define screenHeight1 height
|
||||
#define halfScreenWidth1 screenWidth1/2
|
||||
#define halfScreenHeight1 screenHeight1/2
|
||||
#define screenWidth1SubOne (screenWidth1-1)
|
||||
#define screenHeight1SubOne (screenHeight1-1)
|
||||
#define stride screenWidth1
|
||||
#define screenPixelNumber screenWidth1*screenHeight1
|
||||
#define depthBufferSize screenPixelNumber*depthComplexity
|
||||
|
||||
#define WGS 1
|
||||
|
||||
//---------------------------------------------------------------
|
||||
|
||||
struct __VSSpriteOut
|
||||
{
|
||||
float4 position;
|
||||
float4 particlePosition;
|
||||
};
|
||||
|
||||
typedef struct __VSSpriteout VSSpriteOut;
|
||||
|
||||
struct __GSSpriteOut
|
||||
{
|
||||
float4 position;
|
||||
float2 textureUV;
|
||||
// float4 viewSpacePosition;
|
||||
// float4 particlePosition;
|
||||
};
|
||||
|
||||
typedef struct __GSSpriteout GSSpriteOut;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
__constant float4 g_positions[4] =
|
||||
{
|
||||
(float4)(-1.0f, 1.0f, 0.0f, 0.0f),
|
||||
(float4)( 1.0f, 1.0f, 0.0f, 0.0f),
|
||||
(float4)( -1.0f, -1.0f, 0.0f, 0.0f),
|
||||
(float4)( 1.0f, -1.0f, 0.0f, 0.0f)
|
||||
};
|
||||
|
||||
__constant float2 g_texcoords[4] =
|
||||
{
|
||||
(float2)(0.0f,0.0f),
|
||||
(float2)(1.0f,0.0f),
|
||||
(float2)(0.0f,1.0f),
|
||||
(float2)(1.0f,1.0f)
|
||||
};
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
void copyMatrix(
|
||||
float matrix[16],
|
||||
__constant float matrix0[16])
|
||||
{
|
||||
uint i;
|
||||
|
||||
for (i = 0; i < 16; i++) {
|
||||
matrix[i] = matrix0[i];
|
||||
}
|
||||
}
|
||||
|
||||
void matrixMulLoopBody(
|
||||
uint i,
|
||||
float matrix[16],
|
||||
__constant float matrix0[16],
|
||||
__constant float matrix1[16])
|
||||
{
|
||||
matrix[i] = 0.0f;
|
||||
matrix[i] += matrix0[(i%4) + (0*4)] * matrix1[(0) + ((i/4)*4)];
|
||||
matrix[i] += matrix0[(i%4) + (1*4)] * matrix1[(1) + ((i/4)*4)];
|
||||
matrix[i] += matrix0[(i%4) + (2*4)] * matrix1[(2) + ((i/4)*4)];
|
||||
matrix[i] += matrix0[(i%4) + (3*4)] * matrix1[(3) + ((i/4)*4)];
|
||||
}
|
||||
|
||||
void matrixMul(
|
||||
float matrix[16],
|
||||
__constant float matrix0[16],
|
||||
__constant float matrix1[16])
|
||||
{
|
||||
matrixMulLoopBody(0, matrix, matrix0, matrix1);
|
||||
matrixMulLoopBody(1, matrix, matrix0, matrix1);
|
||||
matrixMulLoopBody(2, matrix, matrix0, matrix1);
|
||||
matrixMulLoopBody(3, matrix, matrix0, matrix1);
|
||||
matrixMulLoopBody(4, matrix, matrix0, matrix1);
|
||||
matrixMulLoopBody(5, matrix, matrix0, matrix1);
|
||||
matrixMulLoopBody(6, matrix, matrix0, matrix1);
|
||||
matrixMulLoopBody(7, matrix, matrix0, matrix1);
|
||||
matrixMulLoopBody(8, matrix, matrix0, matrix1);
|
||||
matrixMulLoopBody(9, matrix, matrix0, matrix1);
|
||||
matrixMulLoopBody(10, matrix, matrix0, matrix1);
|
||||
matrixMulLoopBody(11, matrix, matrix0, matrix1);
|
||||
matrixMulLoopBody(12, matrix, matrix0, matrix1);
|
||||
matrixMulLoopBody(13, matrix, matrix0, matrix1);
|
||||
matrixMulLoopBody(14, matrix, matrix0, matrix1);
|
||||
matrixMulLoopBody(15, matrix, matrix0, matrix1);
|
||||
}
|
||||
|
||||
float4 matrixVectorMul(float matrix[16], float4 vector)
|
||||
{
|
||||
float4 result;
|
||||
|
||||
result.x = matrix[0]*vector.x + matrix[4+0]*vector.y + matrix[8+0]*vector.z + matrix[12+0]*vector.w;
|
||||
result.y = matrix[1]*vector.x + matrix[4+1]*vector.y + matrix[8+1]*vector.z + matrix[12+1]*vector.w;
|
||||
result.z = matrix[2]*vector.x + matrix[4+2]*vector.y + matrix[8+2]*vector.z + matrix[12+2]*vector.w;
|
||||
result.w = matrix[3]*vector.x + matrix[4+3]*vector.y + matrix[8+3]*vector.z + matrix[12+3]*vector.w;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
float3 matrixVector3Mul(__constant float matrix[9], float3 vector)
|
||||
{
|
||||
float3 result;
|
||||
|
||||
result.x = matrix[0]*vector.x + matrix[3+0]*vector.y + matrix[6+0]*vector.z;
|
||||
result.y = matrix[1]*vector.x + matrix[3+1]*vector.y + matrix[6+1]*vector.z;
|
||||
result.z = matrix[2]*vector.x + matrix[3+2]*vector.y + matrix[6+2]*vector.z;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
//#define DEVICE_CPU 1
|
||||
#if defined(DEVICE_CPU)
|
||||
void printMatrix(char * name, __constant float matrix[16])
|
||||
{
|
||||
printf("%s[0] = %f, %f, %f, %f\n", name, matrix[0], matrix[1], matrix[2], matrix[3]);
|
||||
printf("%s[1] = %f, %f, %f, %f\n", name, matrix[4], matrix[5], matrix[6], matrix[7]);
|
||||
printf("%s[2] = %f, %f, %f, %f\n", name, matrix[8], matrix[9], matrix[10], matrix[11]);
|
||||
printf("%s[3] = %f, %f, %f, %f\n", name, matrix[12], matrix[13], matrix[14], matrix[15]);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if 1
|
||||
__kernel void vertexShader(
|
||||
__constant float modelview[16],
|
||||
__constant float projection[16],
|
||||
__global float4 * inputPrimitives,
|
||||
__global float4 * outputPrimitives)
|
||||
{
|
||||
float matrix[16];
|
||||
float4 gl_Vertex;
|
||||
float4 gl_Position;
|
||||
|
||||
uint id = get_global_id(0);
|
||||
|
||||
gl_Vertex = inputPrimitives[id];
|
||||
|
||||
// gl_ProjectionMatrix * gl_ModelViewMatrix * gl_Vertex
|
||||
matrixMul(matrix, projection, modelview);
|
||||
|
||||
gl_Position = matrixVectorMul(matrix, gl_Vertex);
|
||||
|
||||
outputPrimitives[id] = gl_Position;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
__kernel void vertexShader(
|
||||
__constant float modelview[16],
|
||||
__constant float projection[16],
|
||||
__global float4 * inputPrimitives,
|
||||
__global float4 * outputPrimitives)
|
||||
{
|
||||
uint id = get_global_id(0);
|
||||
|
||||
outputPrimitives[id] = inputPrimitives[id];
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
//-----------------------------------------------------------------------------------
|
||||
|
||||
__kernel void
|
||||
clearImage(
|
||||
__write_only image2d_t image,
|
||||
float4 color)
|
||||
{
|
||||
|
||||
int2 coords = (int2)(get_global_id(0), get_global_id(1));
|
||||
write_imagef(image, coords, color);
|
||||
}
|
||||
|
||||
// OpenGL viewport transformation
|
||||
// The site http://research.cs.queensu.ca/~jstewart/454/notes/pipeline/
|
||||
// contains a description of this process
|
||||
void
|
||||
viewportTransform(float4 v, __constant int4 viewport[1], float2 * output)
|
||||
{
|
||||
int4 vp = viewport[0];
|
||||
*output
|
||||
= 0.5f *
|
||||
(float2)(v.x+1,v.y+1) *
|
||||
(float2)((vp.s2-vp.s0) + vp.s0,
|
||||
(vp.s3-vp.s1) + vp.s1);
|
||||
}
|
||||
|
||||
#define PARTICLE_WIDTH 32.0f
|
||||
#define PARTICLE_HEIGHT 32.0f
|
||||
|
||||
// Unoptimized triangle rasterizer function
|
||||
// Details of the algorithm can be found here:
|
||||
// http://www.devmaster.net/forums/showthread.php?t=1884
|
||||
//
|
||||
void
|
||||
rasterizerUnOpt(
|
||||
__global struct __GSSpriteOut * outputPrimitives,
|
||||
// __global float4 * outputPrimitives,
|
||||
__constant int4 viewport[1],
|
||||
__write_only image2d_t screen,
|
||||
__read_only image2d_t particle,
|
||||
uint v1Offset,
|
||||
uint v2Offset,
|
||||
uint v3Offset,
|
||||
__global float4 * debugOut1)
|
||||
{
|
||||
sampler_t sampler =
|
||||
CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
|
||||
|
||||
uint id = get_global_id(0);
|
||||
|
||||
struct __GSSpriteOut output;
|
||||
float2 v1, v2, v3;
|
||||
float2 uv1, uv2, uv3;
|
||||
|
||||
output = outputPrimitives[id*4+v1Offset];
|
||||
uv1 = output.textureUV;
|
||||
viewportTransform(output.position, viewport, &v1);
|
||||
|
||||
output = outputPrimitives[id*4+v2Offset];
|
||||
uv2 = output.textureUV;
|
||||
viewportTransform(output.position, viewport, &v2);
|
||||
|
||||
output = outputPrimitives[id*4+v3Offset];
|
||||
uv3 = output.textureUV;
|
||||
viewportTransform(output.position, viewport, &v3);
|
||||
|
||||
// Bounding rectangle
|
||||
int2 min_ = convert_int2(min(v1, min(v2, v3)));
|
||||
int2 max_ = convert_int2(max(v1, max(v2, v3)));
|
||||
|
||||
// naive bi-linear interploation for texture coords, note this is
|
||||
// broken with respect to OpenGL and needs to be fixed for the
|
||||
// general case.
|
||||
float p1x = v2.x - v1.x;
|
||||
float p1y = v2.y - v1.y;
|
||||
|
||||
float p2x = v3.x - v1.x;
|
||||
float p2y = v3.y - v1.y;
|
||||
|
||||
// Scan through bounding rectangle
|
||||
for(int y = min_.y; y < max_.y; y++) {
|
||||
for(int x = min_.x; x < max_.x; x++) {
|
||||
// When all half-space functions positive, pixel is in triangle
|
||||
if((v1.x - v2.x) * (y - v1.y) - (v1.y - v2.y) * (x - v1.x) > 0 &&
|
||||
(v2.x - v3.x) * (y - v2.y) - (v2.y - v3.y) * (x - v2.x) > 0 &&
|
||||
(v3.x - v1.x) * (y - v3.y) - (v3.y - v1.y) * (x - v3.x) > 0) {
|
||||
|
||||
float px = x - v1.x;
|
||||
float py = y - v1.y;
|
||||
|
||||
write_imagef(
|
||||
screen,
|
||||
(int2)(x,y),
|
||||
// texel);
|
||||
(float4)(1.0f,1.0f,1.0f,1.0f));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Optimized rasterizer function
|
||||
// Details of the algorithm can be found here:
|
||||
// http://www.devmaster.net/forums/showthread.php?t=1884
|
||||
//
|
||||
// Currently has a bug, still work in progess
|
||||
__kernel void
|
||||
rasterizerXX(
|
||||
__global float4 * outputPrimitives,
|
||||
__write_only image2d_t screen,
|
||||
__global float4 * debugOut1,
|
||||
__global int2 * debugOut2)
|
||||
{
|
||||
uint id = get_global_id(0);
|
||||
|
||||
// printf("ras\n");
|
||||
|
||||
float4 v1 = outputPrimitives[id*4+0];
|
||||
float4 v2 = outputPrimitives[id*4+1];
|
||||
float4 v3 = outputPrimitives[id*4+2];
|
||||
|
||||
float y1 = 0.5f* (v1.y+1) * (T - B) + B;
|
||||
float y2 = 0.5f* (v2.y+1) * (T - B) + B;
|
||||
float y3 = 0.5f* (v3.y+1) * (T - B) + B;
|
||||
|
||||
float x1 = 0.5f * (v1.x+1) * (R - L) + L;
|
||||
float x2 = 0.5f * (v2.x+1) * (R - L) + L;
|
||||
float x3 = 0.5f * (v3.x+1) * (R - L) + L;
|
||||
|
||||
const int Y1 = convert_int(shiftValue * y1);
|
||||
const int Y2 = convert_int(shiftValue * y2);
|
||||
const int Y3 = convert_int(shiftValue * y3);
|
||||
|
||||
const int X1 = convert_int(shiftValue * x1);
|
||||
const int X2 = convert_int(shiftValue * x2);
|
||||
const int X3 = convert_int(shiftValue * x3);
|
||||
|
||||
debugOut1[id*4+0] = v1;
|
||||
debugOut1[id*4+1] = v2;
|
||||
debugOut1[id*4+2] = v3;
|
||||
|
||||
debugOut2[id*3+0] = (int2)(X1, Y1);
|
||||
debugOut2[id*3+1] = (int2)(X2, Y2);
|
||||
debugOut2[id*3+2] = (int2)(X3, Y3);
|
||||
|
||||
// Deltas
|
||||
const int DX12 = X1 - X2;
|
||||
const int DX23 = X2 - X3;
|
||||
const int DX31 = X3 - X1;
|
||||
|
||||
const int DY12 = Y1 - Y2;
|
||||
const int DY23 = Y2 - Y3;
|
||||
const int DY31 = Y3 - Y1;
|
||||
|
||||
// Fixed-point deltas
|
||||
const int FDX12 = DX12 << shiftNumber;
|
||||
const int FDX23 = DX23 << shiftNumber;
|
||||
const int FDX31 = DX31 << shiftNumber;
|
||||
|
||||
const int FDY12 = DY12 << shiftNumber;
|
||||
const int FDY23 = DY23 << shiftNumber;
|
||||
const int FDY31 = DY31 << shiftNumber;
|
||||
|
||||
// Bounding rectangle
|
||||
int minx = (min(X1, min(X2, X3)) + shiftMask) >> shiftNumber;
|
||||
//minx = max(0,minx);
|
||||
|
||||
int maxx = (max(X1, min(X2, X3)) + shiftMask) >> shiftNumber;
|
||||
//min(maxx , screenWidth1SubOne);
|
||||
|
||||
int miny = (min(Y1, min(Y2, Y3)) + shiftMask) >> shiftNumber;
|
||||
//max(0,miny);
|
||||
|
||||
int maxy = (max(Y1, min(Y2, Y3)) + shiftMask) >> shiftNumber;
|
||||
//min(maxy , screenHeight1SubOne);
|
||||
|
||||
//(char*&)colorBuffer += miny * stride;
|
||||
int offset = miny * stride;
|
||||
|
||||
// Half-edge constants
|
||||
int C1 = DY12 * X1 - DX12 * Y1;
|
||||
int C2 = DY23 * X2 - DX23 * Y2;
|
||||
int C3 = DY31 * X3 - DX31 * Y3;
|
||||
|
||||
// Correct for fill convention
|
||||
if(DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++;
|
||||
if(DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++;
|
||||
if(DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++;
|
||||
|
||||
int CY1 = C1 + DX12 * (miny << shiftNumber) - DY12 * (minx << shiftNumber);
|
||||
int CY2 = C2 + DX23 * (miny << shiftNumber) - DY23 * (minx << shiftNumber);
|
||||
int CY3 = C3 + DX31 * (miny << shiftNumber) - DY31 * (minx << shiftNumber);
|
||||
|
||||
for(int y = miny; y < maxy; y++) {
|
||||
int CX1 = CY1;
|
||||
int CX2 = CY2;
|
||||
int CX3 = CY3;
|
||||
|
||||
debugOut2[id*3+0] = (int2)(minx, maxx);
|
||||
|
||||
for(int x = minx; x < maxx; x++) {
|
||||
debugOut2[id*3+0] = (int2)(CX1, CX2);
|
||||
|
||||
if(CX1 > 0 && CX2 > 0 && CX3 > 0) {
|
||||
debugOut2[id*3+0] = (int2)(1, 1);
|
||||
write_imagef(
|
||||
screen,
|
||||
(int2)(x,y),
|
||||
(float4)(1.0f,1.0f,1.0f,1.0f));
|
||||
}
|
||||
|
||||
CX1 -= FDY12;
|
||||
CX2 -= FDY23;
|
||||
CX3 -= FDY31;
|
||||
}
|
||||
|
||||
CY1 += FDX12;
|
||||
CY2 += FDX23;
|
||||
CY3 += FDX31;
|
||||
|
||||
//(char*&)colorBuffer += stride;
|
||||
offset += stride;
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
void geometryShader(
|
||||
__constant float modelview[16],
|
||||
__constant float projection[16],
|
||||
__constant float inverseView[9],
|
||||
__constant int4 viewport[1],
|
||||
__local struct __VSSpriteOut * vsOutputPrimitives,
|
||||
__global struct __GSSpriteOut * outputPrimitives,
|
||||
// __global float4 * outputPrimitives,
|
||||
__write_only image2d_t screen,
|
||||
__read_only image2d_t particle,
|
||||
__global float4 * debugOut1,
|
||||
__global int * debugOut2)
|
||||
{
|
||||
float2 texcoords[4] =
|
||||
{
|
||||
(float2)(0.0f,0.0f),
|
||||
(float2)(1.0f,0.0f),
|
||||
(float2)(0.0f,1.0f),
|
||||
(float2)(1.0f,1.0f)
|
||||
};
|
||||
|
||||
float matrix[16];
|
||||
|
||||
uint id = get_global_id(0);
|
||||
uint lid = get_local_id(0);
|
||||
|
||||
float4 vsPosition = vsOutputPrimitives[lid].position;
|
||||
|
||||
matrixMul(matrix, projection, modelview);
|
||||
//
|
||||
// Emit two new triangles
|
||||
//
|
||||
for (uint i = 0; i<4; i++) {
|
||||
float3 position = g_positions[i] * PARTICLE_RADIUS;
|
||||
position = matrixVector3Mul(inverseView, position) + vsPosition;
|
||||
float3 particlePosition =
|
||||
matrixVector3Mul(
|
||||
inverseView,
|
||||
(float4)(0.0f,0.0f,0.0f,0.0f)) + vsPosition; // world space
|
||||
|
||||
// Compute view space position
|
||||
position.w = 1.0f;
|
||||
position = matrixVectorMul(matrix, position);
|
||||
|
||||
//perspective division
|
||||
position /= position.w;
|
||||
|
||||
struct __GSSpriteOut output;
|
||||
output.position = position;
|
||||
//output.textureUV = g_texcoords[i];
|
||||
output.textureUV = texcoords[i];
|
||||
outputPrimitives[id*4+i] = output;
|
||||
}
|
||||
|
||||
// Render QUAD - Triangle 1
|
||||
rasterizerUnOpt(
|
||||
outputPrimitives,
|
||||
viewport,
|
||||
screen,
|
||||
particle,
|
||||
0,
|
||||
1,
|
||||
2,
|
||||
debugOut1);
|
||||
|
||||
// Render QUAD - Triangle 2
|
||||
rasterizerUnOpt(
|
||||
outputPrimitives,
|
||||
viewport,
|
||||
screen,
|
||||
particle,
|
||||
2,
|
||||
1,
|
||||
3,
|
||||
debugOut1);
|
||||
}
|
||||
|
||||
__kernel void vertexShaderSprite(
|
||||
__constant float modelview[16],
|
||||
__constant float projection[16],
|
||||
__constant float inverseView[9],
|
||||
__constant int4 viewport[1],
|
||||
__local struct __VSSpriteOut * vsOutputPrimitives,
|
||||
__global float4 * inputPrimitives,
|
||||
__global struct __GSSpriteOut * outputPrimitives,
|
||||
// __global float4 * outputPrimitives,
|
||||
__write_only image2d_t screen,
|
||||
__read_only image2d_t particle,
|
||||
__global float4 * debugOut1,
|
||||
__global int * debugOut2)
|
||||
{
|
||||
float matrix[16];
|
||||
|
||||
uint id = get_global_id(0);
|
||||
uint lid = get_local_id(0);
|
||||
|
||||
// gl_ProjectionMatrix * gl_ModelViewMatrix * gl_Vertex
|
||||
matrixMul(matrix, projection, modelview);
|
||||
|
||||
float4 position = inputPrimitives[id];
|
||||
vsOutputPrimitives[lid].position = position;
|
||||
vsOutputPrimitives[lid].particlePosition =
|
||||
matrixVectorMul(matrix, position);
|
||||
|
||||
geometryShader(
|
||||
modelview,
|
||||
projection,
|
||||
inverseView,
|
||||
viewport,
|
||||
vsOutputPrimitives,
|
||||
outputPrimitives,
|
||||
screen,
|
||||
particle,
|
||||
debugOut1,
|
||||
debugOut2);
|
||||
}
|
||||
Reference in New Issue
Block a user