Add the GPU rigid body pipeline from https://github.com/erwincoumans/experiments as a Bullet 3.x preview for Bullet 2.80
This commit is contained in:
230
Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlAabb.h
Normal file
230
Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlAabb.h
Normal file
@@ -0,0 +1,230 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
#ifndef AABB_H
|
||||
#define AABB_H
|
||||
|
||||
#include "Stubs/AdlMath.h"
|
||||
#include "Stubs/AdlQuaternion.h"
|
||||
|
||||
enum AdlCollisionShapeTypes
|
||||
{
|
||||
ADL_SHAPE_SPHERE=2,
|
||||
ADL_SHAPE_HEIGHT_FIELD,
|
||||
SHAPE_CONVEX_HEIGHT_FIELD,
|
||||
};
|
||||
|
||||
_MEM_CLASSALIGN16
|
||||
struct Aabb
|
||||
{
|
||||
public:
|
||||
_MEM_ALIGNED_ALLOCATOR16;
|
||||
|
||||
__inline
|
||||
void setEmpty();
|
||||
__inline
|
||||
void includeVolume( const Aabb& aabb );
|
||||
__inline
|
||||
void includePoint( const float4& p );
|
||||
__inline
|
||||
bool overlaps( const float4& p ) const;
|
||||
__inline
|
||||
bool overlaps( const Aabb& aabb ) const;
|
||||
__inline
|
||||
float4 center() const;
|
||||
__inline
|
||||
int getMajorAxis() const;
|
||||
__inline
|
||||
float4 getExtent() const;
|
||||
__inline
|
||||
void expandBy( const float4& r );
|
||||
|
||||
__inline
|
||||
static bool overlaps( const Aabb& a, const Aabb& b );
|
||||
|
||||
__inline
|
||||
bool intersect(const float4* from, const float4* to, const float4* invRay) const;
|
||||
|
||||
__inline
|
||||
void transform(const float4& translation, const Quaternion& quat);
|
||||
|
||||
__inline
|
||||
void transform(const float4& translation, const Matrix3x3& rot);
|
||||
|
||||
public:
|
||||
float4 m_max;
|
||||
float4 m_min;
|
||||
};
|
||||
|
||||
void Aabb::setEmpty()
|
||||
{
|
||||
m_max = make_float4( -FLT_MAX );
|
||||
m_min = make_float4( FLT_MAX );
|
||||
}
|
||||
|
||||
void Aabb::includeVolume(const Aabb& aabb)
|
||||
{
|
||||
m_max.x = max2( m_max.x, aabb.m_max.x );
|
||||
m_min.x = min2( m_min.x, aabb.m_min.x );
|
||||
|
||||
m_max.y = max2( m_max.y, aabb.m_max.y );
|
||||
m_min.y = min2( m_min.y, aabb.m_min.y );
|
||||
|
||||
m_max.z = max2( m_max.z, aabb.m_max.z );
|
||||
m_min.z = min2( m_min.z, aabb.m_min.z );
|
||||
}
|
||||
|
||||
void Aabb::includePoint( const float4& p )
|
||||
{
|
||||
m_max.x = max2( m_max.x, p.x );
|
||||
m_min.x = min2( m_min.x, p.x );
|
||||
|
||||
m_max.y = max2( m_max.y, p.y );
|
||||
m_min.y = min2( m_min.y, p.y );
|
||||
|
||||
m_max.z = max2( m_max.z, p.z );
|
||||
m_min.z = min2( m_min.z, p.z );
|
||||
}
|
||||
|
||||
bool Aabb::overlaps( const float4& p ) const
|
||||
{
|
||||
float4 dx = m_max-p;
|
||||
float4 dm = p-m_min;
|
||||
|
||||
return (dx.x >= 0 && dx.y >= 0 && dx.z >= 0)
|
||||
&& (dm.x >= 0 && dm.y >= 0 && dm.z >= 0);
|
||||
}
|
||||
|
||||
bool Aabb::overlaps( const Aabb& in ) const
|
||||
{
|
||||
/*
|
||||
if( m_max.x < in.m_min.x || m_min.x > in.m_max.x ) return false;
|
||||
if( m_max.y < in.m_min.y || m_min.y > in.m_max.y ) return false;
|
||||
if( m_max.z < in.m_min.z || m_min.z > in.m_max.z ) return false;
|
||||
|
||||
return true;
|
||||
*/
|
||||
return overlaps( *this, in );
|
||||
}
|
||||
|
||||
bool Aabb::overlaps( const Aabb& a, const Aabb& b )
|
||||
{
|
||||
if( a.m_max.x < b.m_min.x || a.m_min.x > b.m_max.x ) return false;
|
||||
if( a.m_max.y < b.m_min.y || a.m_min.y > b.m_max.y ) return false;
|
||||
if( a.m_max.z < b.m_min.z || a.m_min.z > b.m_max.z ) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
float4 Aabb::center() const
|
||||
{
|
||||
return 0.5f*(m_max+m_min);
|
||||
}
|
||||
|
||||
int Aabb::getMajorAxis() const
|
||||
{
|
||||
float4 extent = getExtent();
|
||||
|
||||
int majorAxis = 0;
|
||||
if( extent.s[1] > extent.s[0] )
|
||||
majorAxis = 1;
|
||||
if( extent.s[2] > extent.s[majorAxis] )
|
||||
majorAxis = 2;
|
||||
|
||||
return majorAxis;
|
||||
}
|
||||
|
||||
float4 Aabb::getExtent() const
|
||||
{
|
||||
return m_max-m_min;
|
||||
}
|
||||
|
||||
void Aabb::expandBy( const float4& r )
|
||||
{
|
||||
m_max += r;
|
||||
m_min -= r;
|
||||
}
|
||||
|
||||
bool Aabb::intersect(const float4* from, const float4* to, const float4* invRay) const
|
||||
{
|
||||
float4 dFar;
|
||||
dFar = (m_max - *from);
|
||||
dFar *= *invRay;
|
||||
float4 dNear;
|
||||
dNear = (m_min - *from);
|
||||
dNear *= *invRay;
|
||||
|
||||
float4 tFar;
|
||||
tFar = max2(dFar, dNear);
|
||||
float4 tNear;
|
||||
tNear = min2(dFar, dNear);
|
||||
|
||||
float farf[] = { tFar.x, tFar.y, tFar.z };
|
||||
|
||||
float nearf[] = { tNear.x, tNear.y, tNear.z };
|
||||
|
||||
float minFar = min2(farf[0], min2(farf[1], farf[2]));
|
||||
float maxNear = max2(nearf[0], max2(nearf[1], nearf[2]));
|
||||
|
||||
minFar = min2(1.0f, minFar );
|
||||
maxNear = max2(0.0f, maxNear);
|
||||
|
||||
return (minFar >= maxNear);
|
||||
}
|
||||
|
||||
void Aabb::transform(const float4& translation, const Matrix3x3& m)
|
||||
{
|
||||
float4 c = center();
|
||||
|
||||
Aabb& ans = *this;
|
||||
|
||||
float4 e[] = { m.m_row[0]*m_min, m.m_row[1]*m_min, m.m_row[2]*m_min };
|
||||
float4 f[] = { m.m_row[0]*m_max, m.m_row[1]*m_max, m.m_row[2]*m_max };
|
||||
ans.m_max = ans.m_min = translation;
|
||||
|
||||
{ int j=0;
|
||||
float4 mi = make_float4( min2( e[j].x, f[j].x ), min2( e[j].y, f[j].y ), min2( e[j].z, f[j].z ) );
|
||||
float4 ma = make_float4( max2( e[j].x, f[j].x ), max2( e[j].y, f[j].y ), max2( e[j].z, f[j].z ) );
|
||||
|
||||
ans.m_min.x += mi.x+mi.y+mi.z;
|
||||
ans.m_max.x += ma.x+ma.y+ma.z;
|
||||
}
|
||||
|
||||
{ int j=1;
|
||||
float4 mi = make_float4( min2( e[j].x, f[j].x ), min2( e[j].y, f[j].y ), min2( e[j].z, f[j].z ) );
|
||||
float4 ma = make_float4( max2( e[j].x, f[j].x ), max2( e[j].y, f[j].y ), max2( e[j].z, f[j].z ) );
|
||||
|
||||
ans.m_min.y += mi.x+mi.y+mi.z;
|
||||
ans.m_max.y += ma.x+ma.y+ma.z;
|
||||
}
|
||||
|
||||
{ int j=2;
|
||||
float4 mi = make_float4( min2( e[j].x, f[j].x ), min2( e[j].y, f[j].y ), min2( e[j].z, f[j].z ) );
|
||||
float4 ma = make_float4( max2( e[j].x, f[j].x ), max2( e[j].y, f[j].y ), max2( e[j].z, f[j].z ) );
|
||||
|
||||
ans.m_min.z += mi.x+mi.y+mi.z;
|
||||
ans.m_max.z += ma.x+ma.y+ma.z;
|
||||
}
|
||||
}
|
||||
|
||||
void Aabb::transform(const float4& translation, const Quaternion& quat)
|
||||
{
|
||||
Matrix3x3 m = qtGetRotationMatrix( quat );
|
||||
|
||||
transform( translation, m );
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
212
Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlArray.h
Normal file
212
Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlArray.h
Normal file
@@ -0,0 +1,212 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
#ifndef ARRAY_H
|
||||
#define ARRAY_H
|
||||
|
||||
#include <string.h>
|
||||
#include <malloc.h>
|
||||
#include <Common/Base/Error.h>
|
||||
#include <new.h>
|
||||
|
||||
|
||||
template <class T>
|
||||
class Array
|
||||
{
|
||||
public:
|
||||
__inline
|
||||
Array();
|
||||
__inline
|
||||
Array(int size);
|
||||
__inline
|
||||
~Array();
|
||||
__inline
|
||||
T& operator[] (int idx);
|
||||
__inline
|
||||
const T& operator[] (int idx) const;
|
||||
__inline
|
||||
void pushBack(const T& elem);
|
||||
__inline
|
||||
void popBack();
|
||||
__inline
|
||||
void clear();
|
||||
__inline
|
||||
void setSize(int size);
|
||||
__inline
|
||||
int getSize() const;
|
||||
__inline
|
||||
T* begin();
|
||||
__inline
|
||||
const T* begin() const;
|
||||
__inline
|
||||
int indexOf(const T& data) const;
|
||||
__inline
|
||||
void removeAt(int idx);
|
||||
__inline
|
||||
T& expandOne();
|
||||
|
||||
private:
|
||||
Array(const Array& a){}
|
||||
|
||||
private:
|
||||
enum
|
||||
{
|
||||
DEFAULT_SIZE = 128,
|
||||
INCREASE_SIZE = 128,
|
||||
};
|
||||
|
||||
T* m_data;
|
||||
int m_size;
|
||||
int m_capacity;
|
||||
};
|
||||
|
||||
template<class T>
|
||||
Array<T>::Array()
|
||||
{
|
||||
m_size = 0;
|
||||
m_capacity = DEFAULT_SIZE;
|
||||
// m_data = new T[ m_capacity ];
|
||||
m_data = (T*)_aligned_malloc(sizeof(T)*m_capacity, 16);
|
||||
for(int i=0; i<m_capacity; i++) new(&m_data[i])T;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
Array<T>::Array(int size)
|
||||
{
|
||||
m_size = size;
|
||||
m_capacity = size;
|
||||
// m_data = new T[ m_capacity ];
|
||||
m_data = (T*)_aligned_malloc(sizeof(T)*m_capacity, 16);
|
||||
for(int i=0; i<m_capacity; i++) new(&m_data[i])T;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
Array<T>::~Array()
|
||||
{
|
||||
if( m_data )
|
||||
{
|
||||
// delete [] m_data;
|
||||
_aligned_free( m_data );
|
||||
m_data = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
template<class T>
|
||||
T& Array<T>::operator[](int idx)
|
||||
{
|
||||
CLASSERT(idx<m_size);
|
||||
return m_data[idx];
|
||||
}
|
||||
|
||||
template<class T>
|
||||
const T& Array<T>::operator[](int idx) const
|
||||
{
|
||||
CLASSERT(idx<m_size);
|
||||
return m_data[idx];
|
||||
}
|
||||
|
||||
template<class T>
|
||||
void Array<T>::pushBack(const T& elem)
|
||||
{
|
||||
if( m_size == m_capacity )
|
||||
{
|
||||
int oldCap = m_capacity;
|
||||
m_capacity += INCREASE_SIZE;
|
||||
// T* s = new T[m_capacity];
|
||||
T* s = (T*)_aligned_malloc(sizeof(T)*m_capacity, 16);
|
||||
memcpy( s, m_data, sizeof(T)*oldCap );
|
||||
// delete [] m_data;
|
||||
_aligned_free( m_data );
|
||||
m_data = s;
|
||||
}
|
||||
m_data[ m_size++ ] = elem;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
void Array<T>::popBack()
|
||||
{
|
||||
CLASSERT( m_size>0 );
|
||||
m_size--;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
void Array<T>::clear()
|
||||
{
|
||||
m_size = 0;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
void Array<T>::setSize(int size)
|
||||
{
|
||||
if( size > m_capacity )
|
||||
{
|
||||
int oldCap = m_capacity;
|
||||
m_capacity = size;
|
||||
// T* s = new T[m_capacity];
|
||||
T* s = (T*)_aligned_malloc(sizeof(T)*m_capacity, 16);
|
||||
for(int i=0; i<m_capacity; i++) new(&s[i])T;
|
||||
memcpy( s, m_data, sizeof(T)*oldCap );
|
||||
// delete [] m_data;
|
||||
_aligned_free( m_data );
|
||||
m_data = s;
|
||||
}
|
||||
m_size = size;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
int Array<T>::getSize() const
|
||||
{
|
||||
return m_size;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
const T* Array<T>::begin() const
|
||||
{
|
||||
return m_data;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
T* Array<T>::begin()
|
||||
{
|
||||
return m_data;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
int Array<T>::indexOf(const T& data) const
|
||||
{
|
||||
for(int i=0; i<m_size; i++)
|
||||
{
|
||||
if( data == m_data[i] ) return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
void Array<T>::removeAt(int idx)
|
||||
{
|
||||
CLASSERT(idx<m_size);
|
||||
m_data[idx] = m_data[--m_size];
|
||||
}
|
||||
|
||||
template<class T>
|
||||
T& Array<T>::expandOne()
|
||||
{
|
||||
setSize( m_size+1 );
|
||||
return m_data[ m_size-1 ];
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -0,0 +1,111 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
#ifndef COLLIDE_UTILS_H
|
||||
#define COLLIDE_UTILS_H
|
||||
|
||||
#include "Stubs/AdlMath.h"
|
||||
|
||||
|
||||
class CollideUtils
|
||||
{
|
||||
public:
|
||||
template<bool FLIPSIGN>
|
||||
static bool collide(const float4& a, const float4& b, const float4& c, const float4& p, float4& normalOut, float margin = 0.f);
|
||||
|
||||
__inline
|
||||
static float castRay(const float4& v0, const float4& v1, const float4& v2,
|
||||
const float4& rayFrom, const float4& rayTo, float margin = 0.0f, float4* bCrdOut = NULL);
|
||||
|
||||
};
|
||||
|
||||
|
||||
template<bool FLIPSIGN>
|
||||
bool CollideUtils::collide(const float4& a, const float4& b, const float4& c, const float4& p, float4& normalOut, float margin)
|
||||
{
|
||||
float4 ab, bc, ca;
|
||||
ab = b-a;
|
||||
bc = c-b;
|
||||
ca = a-c;
|
||||
|
||||
float4 ap, bp, cp;
|
||||
ap = p-a;
|
||||
bp = p-b;
|
||||
cp = p-c;
|
||||
|
||||
float4 n;
|
||||
n = cross3(ab, -1.f*ca);
|
||||
|
||||
float4 abp = cross3( ab, ap );
|
||||
float4 bcp = cross3( bc, bp );
|
||||
float4 cap = cross3( ca, cp );
|
||||
|
||||
float s0 = dot3F4(n,abp);
|
||||
float s1 = dot3F4(n,bcp);
|
||||
float s2 = dot3F4(n,cap);
|
||||
|
||||
// if(( s0<0.f && s1<0.f && s2<0.f ) || ( s0>0.f && s1>0.f && s2>0.f ))
|
||||
if(( s0<margin && s1<margin && s2<margin ) || ( s0>-margin && s1>-margin && s2>-margin ))
|
||||
{
|
||||
n = normalize3( n );
|
||||
n.w = dot3F4(n,ap);
|
||||
|
||||
normalOut = (FLIPSIGN)? -n : n;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
__inline
|
||||
float CollideUtils::castRay(const float4& v0, const float4& v1, const float4& v2,
|
||||
const float4& rayFrom, const float4& rayTo, float margin, float4* bCrdOut)
|
||||
{
|
||||
float t, v, w;
|
||||
float4 ab; ab = v1 - v0;
|
||||
float4 ac; ac = v2 - v0;
|
||||
float4 qp; qp = rayFrom - rayTo;
|
||||
float4 normal = cross3( ab, ac );
|
||||
float d = dot3F4( qp, normal );
|
||||
float odd = 1.f/d;
|
||||
float4 ap; ap = rayFrom - v0;
|
||||
t = dot3F4( ap, normal );
|
||||
t *= odd;
|
||||
// if( t < 0.f || t > 1.f ) return -1;
|
||||
|
||||
float4 e = cross3( qp, ap );
|
||||
v = dot3F4( ac, e );
|
||||
v *= odd;
|
||||
if( v < -margin || v > 1.f+margin ) return -1;
|
||||
w = -dot3F4( ab, e );
|
||||
w *= odd;
|
||||
// if( w < 0.f || w > 1.f ) return -1;
|
||||
if( w < -margin || w > 1.f+margin ) return -1;
|
||||
|
||||
float u = 1.f-v-w;
|
||||
if( u < -margin || u > 1.f+margin ) return -1;
|
||||
|
||||
if( bCrdOut )
|
||||
{
|
||||
bCrdOut->x = u;
|
||||
bCrdOut->y = v;
|
||||
bCrdOut->z = w;
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -0,0 +1,49 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
#ifndef COLLISION_SHAPE_H
|
||||
#define COLLISION_SHAPE_H
|
||||
|
||||
#include "Stubs/AdlMath.h"
|
||||
#include "Stubs/AdlAabb.h"
|
||||
|
||||
|
||||
_MEM_CLASSALIGN16
|
||||
class CollisionShape
|
||||
{
|
||||
public:
|
||||
_MEM_ALIGNED_ALLOCATOR16;
|
||||
|
||||
enum Type
|
||||
{
|
||||
SHAPE_HEIGHT_FIELD,
|
||||
SHAPE_CONVEX_HEIGHT_FIELD,
|
||||
SHAPE_PLANE,
|
||||
MAX_NUM_SHAPE_TYPES,
|
||||
};
|
||||
|
||||
CollisionShape( Type type, float collisionMargin = 0.0025f ) : m_type( type ){ m_collisionMargin = collisionMargin; }
|
||||
virtual ~CollisionShape(){}
|
||||
virtual float queryDistance(const float4& p) const = 0;
|
||||
virtual bool queryDistanceWithNormal(const float4& p, float4& normalOut) const = 0;
|
||||
|
||||
public:
|
||||
Type m_type;
|
||||
Aabb m_aabb;
|
||||
float m_collisionMargin;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,49 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
#ifndef ADL_CONSTRAINT4_H
|
||||
#define ADL_CONSTRAINT4_H
|
||||
|
||||
|
||||
|
||||
struct Constraint4
|
||||
{
|
||||
_MEM_ALIGNED_ALLOCATOR16;
|
||||
|
||||
float4 m_linear;
|
||||
float4 m_worldPos[4];
|
||||
float4 m_center; // friction
|
||||
float m_jacCoeffInv[4];
|
||||
float m_b[4];
|
||||
float m_appliedRambdaDt[4];
|
||||
|
||||
float m_fJacCoeffInv[2]; // friction
|
||||
float m_fAppliedRambdaDt[2]; // friction
|
||||
|
||||
u32 m_bodyA;
|
||||
u32 m_bodyB;
|
||||
|
||||
u32 m_batchIdx;
|
||||
u32 m_paddings[1];
|
||||
|
||||
__inline
|
||||
void setFrictionCoeff(float value) { m_linear.w = value; }
|
||||
__inline
|
||||
float getFrictionCoeff() const { return m_linear.w; }
|
||||
};
|
||||
|
||||
#endif //ADL_CONSTRAINT4_H
|
||||
|
||||
@@ -0,0 +1,102 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
#ifndef ADL_CONTACT4_H
|
||||
#define ADL_CONTACT4_H
|
||||
|
||||
#ifdef CL_PLATFORM_AMD
|
||||
#include "AdlConstraint4.h"
|
||||
#include "Adl/Adl.h"
|
||||
|
||||
typedef adl::Buffer<Constraint4>* SolverData;
|
||||
#else
|
||||
typedef void* SolverData;
|
||||
#endif
|
||||
|
||||
typedef void* ShapeDataType;
|
||||
|
||||
|
||||
struct Contact4
|
||||
{
|
||||
_MEM_ALIGNED_ALLOCATOR16;
|
||||
|
||||
float4 m_worldPos[4];
|
||||
float4 m_worldNormal;
|
||||
// float m_restituitionCoeff;
|
||||
// float m_frictionCoeff;
|
||||
u16 m_restituitionCoeffCmp;
|
||||
u16 m_frictionCoeffCmp;
|
||||
int m_batchIdx;
|
||||
|
||||
u32 m_bodyAPtr;
|
||||
u32 m_bodyBPtr;
|
||||
|
||||
// todo. make it safer
|
||||
int& getBatchIdx() { return m_batchIdx; }
|
||||
float getRestituitionCoeff() const { return ((float)m_restituitionCoeffCmp/(float)0xffff); }
|
||||
void setRestituitionCoeff( float c ) { ADLASSERT( c >= 0.f && c <= 1.f ); m_restituitionCoeffCmp = (u16)(c*0xffff); }
|
||||
float getFrictionCoeff() const { return ((float)m_frictionCoeffCmp/(float)0xffff); }
|
||||
void setFrictionCoeff( float c ) { ADLASSERT( c >= 0.f && c <= 1.f ); m_frictionCoeffCmp = (u16)(c*0xffff); }
|
||||
|
||||
float& getNPoints() { return m_worldNormal.w; }
|
||||
float getNPoints() const { return m_worldNormal.w; }
|
||||
|
||||
float getPenetration(int idx) const { return m_worldPos[idx].w; }
|
||||
|
||||
bool isInvalid() const { return ((u32)m_bodyAPtr+(u32)m_bodyBPtr) == 0; }
|
||||
};
|
||||
|
||||
struct ContactPoint4
|
||||
{
|
||||
float4 m_worldPos[4];
|
||||
union
|
||||
{
|
||||
float4 m_worldNormal;
|
||||
|
||||
struct Data
|
||||
{
|
||||
int m_padding[3];
|
||||
float m_nPoints; // for cl
|
||||
}m_data;
|
||||
|
||||
};
|
||||
float m_restituitionCoeff;
|
||||
float m_frictionCoeff;
|
||||
// int m_nPoints;
|
||||
// int m_padding0;
|
||||
|
||||
void* m_bodyAPtr;
|
||||
void* m_bodyBPtr;
|
||||
// int m_padding1;
|
||||
// int m_padding2;
|
||||
|
||||
float& getNPoints() { return m_data.m_nPoints; }
|
||||
float getNPoints() const { return m_data.m_nPoints; }
|
||||
|
||||
float getPenetration(int idx) const { return m_worldPos[idx].w; }
|
||||
|
||||
// __inline
|
||||
// void load(int idx, const ContactPoint& src);
|
||||
// __inline
|
||||
// void store(int idx, ContactPoint& dst) const;
|
||||
|
||||
bool isInvalid() const { return ((u32)m_bodyAPtr+(u32)m_bodyBPtr) == 0; }
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif //ADL_CONTACT4_H
|
||||
|
||||
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
#ifndef CL_ERROR_H
|
||||
#define CL_ERROR_H
|
||||
|
||||
#ifdef DX11RENDER
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#ifdef _DEBUG
|
||||
#include <assert.h>
|
||||
#define CLASSERT(x) if(!(x)){__debugbreak(); }
|
||||
#define ADLASSERT(x) if(!(x)){__debugbreak(); }
|
||||
#else
|
||||
#define CLASSERT(x) if(x){}
|
||||
#define ADLASSERT(x) if(x){}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
#ifdef _DEBUG
|
||||
#define COMPILE_TIME_ASSERT(x) {int compileTimeAssertFailed[x]; compileTimeAssertFailed[0];}
|
||||
#else
|
||||
#define COMPILE_TIME_ASSERT(x)
|
||||
#endif
|
||||
|
||||
#ifdef _DEBUG
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
__inline
|
||||
void debugPrintf(const char *fmt, ...)
|
||||
{
|
||||
va_list arg;
|
||||
va_start(arg, fmt);
|
||||
#ifdef DX11RENDER
|
||||
char buf[256];
|
||||
vsprintf_s( buf, 256, fmt, arg );
|
||||
#ifdef UNICODE
|
||||
WCHAR wbuf[256];
|
||||
int sizeWide = MultiByteToWideChar(0,0,buf,-1,wbuf,0);
|
||||
MultiByteToWideChar(0,0,buf,-1,wbuf,sizeWide);
|
||||
|
||||
// swprintf_s( wbuf, 256, L"%s", buf );
|
||||
OutputDebugString( wbuf );
|
||||
#else
|
||||
OutputDebugString( buf );
|
||||
#endif
|
||||
#else
|
||||
vprintf(fmt, arg);
|
||||
#endif
|
||||
va_end(arg);
|
||||
}
|
||||
#else
|
||||
__inline
|
||||
void debugPrintf(const char *fmt, ...)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#define WARN(msg) debugPrintf("WARNING: %s\n", msg);
|
||||
|
||||
#endif
|
||||
|
||||
216
Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlMath.h
Normal file
216
Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/AdlMath.h
Normal file
@@ -0,0 +1,216 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
#ifndef CL_MATH_H
|
||||
#define CL_MATH_H
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <float.h>
|
||||
#include <xmmintrin.h>
|
||||
|
||||
|
||||
#include "AdlError.h"
|
||||
#include <algorithm>
|
||||
#define pxSort std::sort
|
||||
|
||||
#define PI 3.14159265358979323846f
|
||||
#define NEXTMULTIPLEOF(num, alignment) (((num)/(alignment) + (((num)%(alignment)==0)?0:1))*(alignment))
|
||||
|
||||
|
||||
#define _MEM_CLASSALIGN16 __declspec(align(16))
|
||||
#define _MEM_ALIGNED_ALLOCATOR16 void* operator new(size_t size) { return _aligned_malloc( size, 16 ); } \
|
||||
void operator delete(void *p) { _aligned_free( p ); } \
|
||||
void* operator new[](size_t size) { return _aligned_malloc( size, 16 ); } \
|
||||
void operator delete[](void *p) { _aligned_free( p ); } \
|
||||
void* operator new(size_t size, void* p) { return p; } \
|
||||
void operator delete(void *p, void* pp) {}
|
||||
|
||||
|
||||
|
||||
template<class T>
|
||||
T nextPowerOf2(T n)
|
||||
{
|
||||
n -= 1;
|
||||
for(int i=0; i<sizeof(T)*8; i++)
|
||||
n = n | (n>>i);
|
||||
return n+1;
|
||||
}
|
||||
|
||||
|
||||
_MEM_CLASSALIGN16
|
||||
struct float4
|
||||
{
|
||||
_MEM_ALIGNED_ALLOCATOR16;
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
float x,y,z,w;
|
||||
};
|
||||
struct
|
||||
{
|
||||
float s[4];
|
||||
};
|
||||
__m128 m_quad;
|
||||
};
|
||||
};
|
||||
|
||||
__forceinline
|
||||
unsigned int isZero(const float4& a)
|
||||
{
|
||||
return (a.x == 0.f) & (a.y == 0.f) & (a.z == 0.f) & (a.w == 0.f);
|
||||
}
|
||||
|
||||
_MEM_CLASSALIGN16
|
||||
struct int4
|
||||
{
|
||||
_MEM_ALIGNED_ALLOCATOR16;
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
int x,y,z,w;
|
||||
};
|
||||
struct
|
||||
{
|
||||
int s[4];
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
struct int2
|
||||
{
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
int x,y;
|
||||
};
|
||||
struct
|
||||
{
|
||||
int s[2];
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
struct float2
|
||||
{
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
float x,y;
|
||||
};
|
||||
struct
|
||||
{
|
||||
float s[2];
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
typedef unsigned int u32;
|
||||
typedef unsigned short u16;
|
||||
typedef unsigned char u8;
|
||||
|
||||
|
||||
|
||||
#include "Adlfloat4.inl"
|
||||
//#include <Common/Math/float4SSE.inl>
|
||||
|
||||
|
||||
|
||||
|
||||
template<typename T>
|
||||
void swap2(T& a, T& b)
|
||||
{
|
||||
T tmp = a;
|
||||
a = b;
|
||||
b = tmp;
|
||||
}
|
||||
|
||||
|
||||
__inline
|
||||
void randSeed(int seed)
|
||||
{
|
||||
srand( seed );
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
__inline
|
||||
T randRange(const T& minV, const T& maxV)
|
||||
{
|
||||
float r = (rand()%10000)/10000.f;
|
||||
T range = maxV - minV;
|
||||
return (T)(minV + r*range);
|
||||
}
|
||||
|
||||
template<>
|
||||
__inline
|
||||
float4 randRange(const float4& minV, const float4& maxV)
|
||||
{
|
||||
float4 r = make_float4( (rand()%10000)/10000.f, (rand()%10000)/10000.f, (rand()%10000)/10000.f, (rand()%10000)/10000.f );
|
||||
float4 range = maxV - minV;
|
||||
return (minV + r*range);
|
||||
}
|
||||
|
||||
|
||||
struct SortData
|
||||
{
|
||||
union
|
||||
{
|
||||
u32 m_key;
|
||||
struct { u16 m_key16[2]; };
|
||||
};
|
||||
u32 m_value;
|
||||
|
||||
friend bool operator <(const SortData& a, const SortData& b)
|
||||
{
|
||||
return a.m_key < b.m_key;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
template<typename T>
|
||||
T* addByteOffset(void* baseAddr, u32 offset)
|
||||
{
|
||||
return (T*)(((u32)baseAddr)+offset);
|
||||
}
|
||||
|
||||
|
||||
struct Pair32
|
||||
{
|
||||
Pair32(){}
|
||||
Pair32(u32 a, u32 b) : m_a(a), m_b(b){}
|
||||
|
||||
u32 m_a;
|
||||
u32 m_b;
|
||||
};
|
||||
|
||||
struct PtrPair
|
||||
{
|
||||
PtrPair(){}
|
||||
PtrPair(void* a, void* b) : m_a(a), m_b(b){}
|
||||
template<typename T>
|
||||
PtrPair(T* a, T* b) : m_a((void*)a), m_b((void*)b){}
|
||||
|
||||
void* m_a;
|
||||
void* m_b;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,194 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
#ifndef MATRIX3X3_H
|
||||
#define MATRIX3X3_H
|
||||
|
||||
#include "AdlMath.h"
|
||||
|
||||
///////////////////////////////////////
|
||||
// Matrix3x3
|
||||
///////////////////////////////////////
|
||||
|
||||
typedef
|
||||
_MEM_CLASSALIGN16 struct
|
||||
{
|
||||
_MEM_ALIGNED_ALLOCATOR16;
|
||||
float4 m_row[3];
|
||||
}Matrix3x3;
|
||||
|
||||
__inline
|
||||
Matrix3x3 mtZero();
|
||||
|
||||
__inline
|
||||
Matrix3x3 mtIdentity();
|
||||
|
||||
__inline
|
||||
Matrix3x3 mtDiagonal(float a, float b, float c);
|
||||
|
||||
__inline
|
||||
Matrix3x3 mtTranspose(const Matrix3x3& m);
|
||||
|
||||
__inline
|
||||
Matrix3x3 mtMul(const Matrix3x3& a, const Matrix3x3& b);
|
||||
|
||||
__inline
|
||||
float4 mtMul1(const Matrix3x3& a, const float4& b);
|
||||
|
||||
__inline
|
||||
Matrix3x3 mtMul2(float a, const Matrix3x3& b);
|
||||
|
||||
__inline
|
||||
float4 mtMul3(const float4& b, const Matrix3x3& a);
|
||||
|
||||
__inline
|
||||
Matrix3x3 mtInvert(const Matrix3x3& m);
|
||||
|
||||
__inline
|
||||
Matrix3x3 mtZero()
|
||||
{
|
||||
Matrix3x3 m;
|
||||
m.m_row[0] = make_float4(0.f);
|
||||
m.m_row[1] = make_float4(0.f);
|
||||
m.m_row[2] = make_float4(0.f);
|
||||
return m;
|
||||
}
|
||||
|
||||
__inline
|
||||
Matrix3x3 mtIdentity()
|
||||
{
|
||||
Matrix3x3 m;
|
||||
m.m_row[0] = make_float4(1,0,0);
|
||||
m.m_row[1] = make_float4(0,1,0);
|
||||
m.m_row[2] = make_float4(0,0,1);
|
||||
return m;
|
||||
}
|
||||
|
||||
__inline
|
||||
Matrix3x3 mtDiagonal(float a, float b, float c)
|
||||
{
|
||||
Matrix3x3 m;
|
||||
m.m_row[0] = make_float4(a,0,0);
|
||||
m.m_row[1] = make_float4(0,b,0);
|
||||
m.m_row[2] = make_float4(0,0,c);
|
||||
return m;
|
||||
}
|
||||
|
||||
__inline
|
||||
Matrix3x3 mtTranspose(const Matrix3x3& m)
|
||||
{
|
||||
Matrix3x3 out;
|
||||
out.m_row[0] = make_float4(m.m_row[0].s[0], m.m_row[1].s[0], m.m_row[2].s[0], 0.f);
|
||||
out.m_row[1] = make_float4(m.m_row[0].s[1], m.m_row[1].s[1], m.m_row[2].s[1], 0.f);
|
||||
out.m_row[2] = make_float4(m.m_row[0].s[2], m.m_row[1].s[2], m.m_row[2].s[2], 0.f);
|
||||
return out;
|
||||
}
|
||||
|
||||
__inline
|
||||
Matrix3x3 mtMul(const Matrix3x3& a, const Matrix3x3& b)
|
||||
{
|
||||
Matrix3x3 transB;
|
||||
transB = mtTranspose( b );
|
||||
Matrix3x3 ans;
|
||||
for(int i=0; i<3; i++)
|
||||
{
|
||||
ans.m_row[i].s[0] = dot3F4(a.m_row[i],transB.m_row[0]);
|
||||
ans.m_row[i].s[1] = dot3F4(a.m_row[i],transB.m_row[1]);
|
||||
ans.m_row[i].s[2] = dot3F4(a.m_row[i],transB.m_row[2]);
|
||||
}
|
||||
return ans;
|
||||
}
|
||||
|
||||
__inline
|
||||
float4 mtMul1(const Matrix3x3& a, const float4& b)
|
||||
{
|
||||
float4 ans;
|
||||
ans.s[0] = dot3F4( a.m_row[0], b );
|
||||
ans.s[1] = dot3F4( a.m_row[1], b );
|
||||
ans.s[2] = dot3F4( a.m_row[2], b );
|
||||
return ans;
|
||||
}
|
||||
|
||||
__inline
|
||||
Matrix3x3 mtMul2(float a, const Matrix3x3& b)
|
||||
{
|
||||
Matrix3x3 ans;
|
||||
ans.m_row[0] = a*b.m_row[0];
|
||||
ans.m_row[1] = a*b.m_row[1];
|
||||
ans.m_row[2] = a*b.m_row[2];
|
||||
return ans;
|
||||
}
|
||||
|
||||
__inline
|
||||
float4 mtMul3(const float4& a, const Matrix3x3& b)
|
||||
{
|
||||
float4 ans;
|
||||
ans.x = a.x*b.m_row[0].x + a.y*b.m_row[1].x + a.z*b.m_row[2].x;
|
||||
ans.y = a.x*b.m_row[0].y + a.y*b.m_row[1].y + a.z*b.m_row[2].y;
|
||||
ans.z = a.x*b.m_row[0].z + a.y*b.m_row[1].z + a.z*b.m_row[2].z;
|
||||
return ans;
|
||||
}
|
||||
|
||||
__inline
|
||||
Matrix3x3 mtInvert(const Matrix3x3& m)
|
||||
{
|
||||
float det = m.m_row[0].s[0]*m.m_row[1].s[1]*m.m_row[2].s[2]+m.m_row[1].s[0]*m.m_row[2].s[1]*m.m_row[0].s[2]+m.m_row[2].s[0]*m.m_row[0].s[1]*m.m_row[1].s[2]
|
||||
-m.m_row[0].s[0]*m.m_row[2].s[1]*m.m_row[1].s[2]-m.m_row[2].s[0]*m.m_row[1].s[1]*m.m_row[0].s[2]-m.m_row[1].s[0]*m.m_row[0].s[1]*m.m_row[2].s[2];
|
||||
|
||||
CLASSERT( det );
|
||||
|
||||
Matrix3x3 ans;
|
||||
ans.m_row[0].s[0] = m.m_row[1].s[1]*m.m_row[2].s[2] - m.m_row[1].s[2]*m.m_row[2].s[1];
|
||||
ans.m_row[0].s[1] = m.m_row[0].s[2]*m.m_row[2].s[1] - m.m_row[0].s[1]*m.m_row[2].s[2];
|
||||
ans.m_row[0].s[2] = m.m_row[0].s[1]*m.m_row[1].s[2] - m.m_row[0].s[2]*m.m_row[1].s[1];
|
||||
ans.m_row[0].w = 0.f;
|
||||
|
||||
ans.m_row[1].s[0] = m.m_row[1].s[2]*m.m_row[2].s[0] - m.m_row[1].s[0]*m.m_row[2].s[2];
|
||||
ans.m_row[1].s[1] = m.m_row[0].s[0]*m.m_row[2].s[2] - m.m_row[0].s[2]*m.m_row[2].s[0];
|
||||
ans.m_row[1].s[2] = m.m_row[0].s[2]*m.m_row[1].s[0] - m.m_row[0].s[0]*m.m_row[1].s[2];
|
||||
ans.m_row[1].w = 0.f;
|
||||
|
||||
ans.m_row[2].s[0] = m.m_row[1].s[0]*m.m_row[2].s[1] - m.m_row[1].s[1]*m.m_row[2].s[0];
|
||||
ans.m_row[2].s[1] = m.m_row[0].s[1]*m.m_row[2].s[0] - m.m_row[0].s[0]*m.m_row[2].s[1];
|
||||
ans.m_row[2].s[2] = m.m_row[0].s[0]*m.m_row[1].s[1] - m.m_row[0].s[1]*m.m_row[1].s[0];
|
||||
ans.m_row[2].w = 0.f;
|
||||
|
||||
ans = mtMul2((1.0f/det), ans);
|
||||
return ans;
|
||||
}
|
||||
|
||||
__inline
|
||||
Matrix3x3 mtSet( const float4& a, const float4& b, const float4& c )
|
||||
{
|
||||
Matrix3x3 m;
|
||||
m.m_row[0] = a;
|
||||
m.m_row[1] = b;
|
||||
m.m_row[2] = c;
|
||||
return m;
|
||||
}
|
||||
|
||||
__inline
|
||||
Matrix3x3 operator+(const Matrix3x3& a, const Matrix3x3& b)
|
||||
{
|
||||
Matrix3x3 out;
|
||||
out.m_row[0] = a.m_row[0] + b.m_row[0];
|
||||
out.m_row[1] = a.m_row[1] + b.m_row[1];
|
||||
out.m_row[2] = a.m_row[2] + b.m_row[2];
|
||||
return out;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -0,0 +1,155 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
#ifndef QUATERNION_H
|
||||
#define QUATERNION_H
|
||||
|
||||
#include "AdlMatrix3x3.h"
|
||||
|
||||
|
||||
typedef float4 Quaternion;
|
||||
|
||||
__inline
|
||||
Quaternion qtSet(const float4& axis, float angle);
|
||||
|
||||
__inline
|
||||
Quaternion qtMul(const Quaternion& a, const Quaternion& b);
|
||||
|
||||
__inline
|
||||
float4 qtRotate(const Quaternion& q, const float4& vec);
|
||||
|
||||
__inline
|
||||
float4 qtInvRotate(const Quaternion& q, const float4& vec);
|
||||
|
||||
__inline
|
||||
Quaternion qtInvert(const Quaternion& q);
|
||||
|
||||
__inline
|
||||
Matrix3x3 qtGetRotationMatrix(const Quaternion& quat);
|
||||
|
||||
__inline
|
||||
Quaternion qtNormalize(const Quaternion& q);
|
||||
|
||||
__inline
|
||||
Quaternion qtGetIdentity() { return make_float4(0,0,0,1); }
|
||||
|
||||
__inline
|
||||
Quaternion qtSet(const float4& axis, float angle)
|
||||
{
|
||||
float4 nAxis = normalize3( axis );
|
||||
|
||||
Quaternion q;
|
||||
q.s[0] = nAxis.s[0]*sin(angle/2);
|
||||
q.s[1] = nAxis.s[1]*sin(angle/2);
|
||||
q.s[2] = nAxis.s[2]*sin(angle/2);
|
||||
q.s[3] = cos(angle/2);
|
||||
return q;
|
||||
}
|
||||
|
||||
__inline
|
||||
Quaternion qtMul(const Quaternion& a, const Quaternion& b)
|
||||
{
|
||||
Quaternion ans;
|
||||
ans = cross3( a, b );
|
||||
ans += a.s[3]*b + b.s[3]*a;
|
||||
ans.s[3] = a.s[3]*b.s[3] - (a.s[0]*b.s[0]+a.s[1]*b.s[1]+a.s[2]*b.s[2]);
|
||||
return ans;
|
||||
}
|
||||
|
||||
__inline
|
||||
float4 qtRotate(const Quaternion& q, const float4& vec)
|
||||
{
|
||||
Quaternion vecQ = vec;
|
||||
vecQ.s[3] = 0.f;
|
||||
Quaternion qInv = qtInvert( q );
|
||||
float4 out = qtMul(qtMul(q,vecQ),qInv);
|
||||
return out;
|
||||
}
|
||||
|
||||
__inline
|
||||
float4 qtInvRotate(const Quaternion& q, const float4& vec)
|
||||
{
|
||||
return qtRotate( qtInvert( q ), vec );
|
||||
}
|
||||
|
||||
__inline
|
||||
Quaternion qtInvert(const Quaternion& q)
|
||||
{
|
||||
Quaternion ans;
|
||||
ans.s[0] = -q.s[0];
|
||||
ans.s[1] = -q.s[1];
|
||||
ans.s[2] = -q.s[2];
|
||||
ans.s[3] = q.s[3];
|
||||
return ans;
|
||||
}
|
||||
|
||||
__inline
|
||||
Matrix3x3 qtGetRotationMatrix(const Quaternion& quat)
|
||||
{
|
||||
float4 quat2 = make_float4(quat.s[0]*quat.s[0], quat.s[1]*quat.s[1], quat.s[2]*quat.s[2], 0.f);
|
||||
Matrix3x3 out;
|
||||
|
||||
out.m_row[0].s[0]=1-2*quat2.s[1]-2*quat2.s[2];
|
||||
out.m_row[0].s[1]=2*quat.s[0]*quat.s[1]-2*quat.s[3]*quat.s[2];
|
||||
out.m_row[0].s[2]=2*quat.s[0]*quat.s[2]+2*quat.s[3]*quat.s[1];
|
||||
out.m_row[0].s[3] = 0.f;
|
||||
|
||||
out.m_row[1].s[0]=2*quat.s[0]*quat.s[1]+2*quat.s[3]*quat.s[2];
|
||||
out.m_row[1].s[1]=1-2*quat2.s[0]-2*quat2.s[2];
|
||||
out.m_row[1].s[2]=2*quat.s[1]*quat.s[2]-2*quat.s[3]*quat.s[0];
|
||||
out.m_row[1].s[3] = 0.f;
|
||||
|
||||
out.m_row[2].s[0]=2*quat.s[0]*quat.s[2]-2*quat.s[3]*quat.s[1];
|
||||
out.m_row[2].s[1]=2*quat.s[1]*quat.s[2]+2*quat.s[3]*quat.s[0];
|
||||
out.m_row[2].s[2]=1-2*quat2.s[0]-2*quat2.s[1];
|
||||
out.m_row[2].s[3] = 0.f;
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
__inline
|
||||
Quaternion qtGetQuaternion(const Matrix3x3* m)
|
||||
{
|
||||
Quaternion q;
|
||||
q.w = sqrtf( m[0].m_row[0].x + m[0].m_row[1].y + m[0].m_row[2].z + 1 ) * 0.5f;
|
||||
float inv4w = 1.f/(4.f*q.w);
|
||||
q.x = (m[0].m_row[2].y-m[0].m_row[1].z)*inv4w;
|
||||
q.y = (m[0].m_row[0].z-m[0].m_row[2].x)*inv4w;
|
||||
q.z = (m[0].m_row[1].x-m[0].m_row[0].y)*inv4w;
|
||||
|
||||
return q;
|
||||
}
|
||||
|
||||
__inline
|
||||
Quaternion qtNormalize(const Quaternion& q)
|
||||
{
|
||||
return normalize4(q);
|
||||
}
|
||||
|
||||
__inline
|
||||
float4 transform(const float4& p, const float4& translation, const Quaternion& orientation)
|
||||
{
|
||||
return qtRotate( orientation, p ) + translation;
|
||||
}
|
||||
|
||||
__inline
|
||||
float4 invTransform(const float4& p, const float4& translation, const Quaternion& orientation)
|
||||
{
|
||||
return qtRotate( qtInvert( orientation ), p-translation ); // use qtInvRotate
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -0,0 +1,59 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
#ifndef ADL_RIGID_BODY_H
|
||||
#define ADL_RIGID_BODY_H
|
||||
|
||||
#include "AdlQuaternion.h"
|
||||
|
||||
class RigidBodyBase
|
||||
{
|
||||
public:
|
||||
|
||||
_MEM_CLASSALIGN16
|
||||
struct Body
|
||||
{
|
||||
_MEM_ALIGNED_ALLOCATOR16;
|
||||
|
||||
float4 m_pos;
|
||||
Quaternion m_quat;
|
||||
float4 m_linVel;
|
||||
float4 m_angVel;
|
||||
|
||||
u32 m_shapeIdx;
|
||||
u32 m_shapeType;
|
||||
|
||||
float m_invMass;
|
||||
float m_restituitionCoeff;
|
||||
float m_frictionCoeff;
|
||||
|
||||
};
|
||||
|
||||
struct Inertia
|
||||
{
|
||||
/* u16 m_shapeType;
|
||||
u16 m_shapeIdx;
|
||||
float m_restituitionCoeff;
|
||||
float m_frictionCoeff;
|
||||
int m_padding;
|
||||
*/
|
||||
Matrix3x3 m_invInertia;
|
||||
Matrix3x3 m_initInvInertia;
|
||||
};
|
||||
};
|
||||
|
||||
#endif// ADL_RIGID_BODY_H
|
||||
|
||||
@@ -0,0 +1,61 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
#ifndef _ADL_TRANSFORM_H
|
||||
#define _ADL_TRANSFORM_H
|
||||
|
||||
#include "AdlMath.h"
|
||||
#include "AdlQuaternion.h"
|
||||
#include "AdlMatrix3x3.h"
|
||||
|
||||
struct Transform
|
||||
{
|
||||
float4 m_translation;
|
||||
Matrix3x3 m_rotation;
|
||||
};
|
||||
|
||||
Transform trSetTransform(const float4& translation, const Quaternion& quat)
|
||||
{
|
||||
Transform tr;
|
||||
tr.m_translation = translation;
|
||||
tr.m_rotation = qtGetRotationMatrix( quat );
|
||||
return tr;
|
||||
}
|
||||
|
||||
Transform trInvert( const Transform& tr )
|
||||
{
|
||||
Transform ans;
|
||||
ans.m_rotation = mtTranspose( tr.m_rotation );
|
||||
ans.m_translation = mtMul1( ans.m_rotation, -tr.m_translation );
|
||||
return ans;
|
||||
}
|
||||
|
||||
Transform trMul(const Transform& trA, const Transform& trB)
|
||||
{
|
||||
Transform ans;
|
||||
ans.m_rotation = mtMul( trA.m_rotation, trB.m_rotation );
|
||||
ans.m_translation = mtMul1( trA.m_rotation, trB.m_translation ) + trA.m_translation;
|
||||
return ans;
|
||||
}
|
||||
|
||||
float4 trMul1(const Transform& tr, const float4& p)
|
||||
{
|
||||
return mtMul1( tr.m_rotation, p ) + tr.m_translation;
|
||||
}
|
||||
|
||||
|
||||
#endif //_ADL_TRANSFORM_H
|
||||
|
||||
@@ -0,0 +1,373 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
//#define CHECK_ALIGNMENT(a) CLASSERT((u32(&(a)) & 0xf) == 0);
|
||||
#define CHECK_ALIGNMENT(a) a;
|
||||
|
||||
|
||||
__inline
|
||||
float4 make_float4(float x, float y, float z, float w = 0.f)
|
||||
{
|
||||
float4 v;
|
||||
v.x = x; v.y = y; v.z = z; v.w = w;
|
||||
return v;
|
||||
}
|
||||
|
||||
__inline
|
||||
float4 make_float4(float x)
|
||||
{
|
||||
return make_float4(x,x,x,x);
|
||||
}
|
||||
|
||||
__inline
|
||||
float4 make_float4(const int4& x)
|
||||
{
|
||||
return make_float4((float)x.s[0], (float)x.s[1], (float)x.s[2], (float)x.s[3]);
|
||||
}
|
||||
|
||||
__inline
|
||||
float2 make_float2(float x, float y)
|
||||
{
|
||||
float2 v;
|
||||
v.s[0] = x; v.s[1] = y;
|
||||
return v;
|
||||
}
|
||||
|
||||
__inline
|
||||
float2 make_float2(float x)
|
||||
{
|
||||
return make_float2(x,x);
|
||||
}
|
||||
|
||||
__inline
|
||||
float2 make_float2(const int2& x)
|
||||
{
|
||||
return make_float2((float)x.s[0], (float)x.s[1]);
|
||||
}
|
||||
|
||||
__inline
|
||||
int4 make_int4(int x, int y, int z, int w = 0)
|
||||
{
|
||||
int4 v;
|
||||
v.s[0] = x; v.s[1] = y; v.s[2] = z; v.s[3] = w;
|
||||
return v;
|
||||
}
|
||||
|
||||
__inline
|
||||
int4 make_int4(int x)
|
||||
{
|
||||
return make_int4(x,x,x,x);
|
||||
}
|
||||
|
||||
__inline
|
||||
int4 make_int4(const float4& x)
|
||||
{
|
||||
return make_int4((int)x.x, (int)x.y, (int)x.z, (int)x.w);
|
||||
}
|
||||
|
||||
__inline
|
||||
int2 make_int2(int a, int b)
|
||||
{
|
||||
int2 ans; ans.x = a; ans.y = b;
|
||||
return ans;
|
||||
}
|
||||
|
||||
__inline
|
||||
float4 operator-(const float4& a)
|
||||
{
|
||||
return make_float4(-a.x, -a.y, -a.z, -a.w);
|
||||
}
|
||||
|
||||
__inline
|
||||
float4 operator*(const float4& a, const float4& b)
|
||||
{
|
||||
CLASSERT((u32(&a) & 0xf) == 0);
|
||||
|
||||
float4 out;
|
||||
out.s[0] = a.s[0]*b.s[0];
|
||||
out.s[1] = a.s[1]*b.s[1];
|
||||
out.s[2] = a.s[2]*b.s[2];
|
||||
out.s[3] = a.s[3]*b.s[3];
|
||||
return out;
|
||||
}
|
||||
|
||||
__inline
|
||||
float4 operator*(float a, const float4& b)
|
||||
{
|
||||
return make_float4(a*b.s[0], a*b.s[1], a*b.s[2], a*b.s[3]);
|
||||
}
|
||||
|
||||
__inline
|
||||
float4 operator*(const float4& b, float a)
|
||||
{
|
||||
CHECK_ALIGNMENT(b);
|
||||
|
||||
return make_float4(a*b.s[0], a*b.s[1], a*b.s[2], a*b.s[3]);
|
||||
}
|
||||
|
||||
__inline
|
||||
void operator*=(float4& a, const float4& b)
|
||||
{
|
||||
CHECK_ALIGNMENT(a);
|
||||
|
||||
a.s[0]*=b.s[0];
|
||||
a.s[1]*=b.s[1];
|
||||
a.s[2]*=b.s[2];
|
||||
a.s[3]*=b.s[3];
|
||||
}
|
||||
|
||||
__inline
|
||||
void operator*=(float4& a, float b)
|
||||
{
|
||||
CHECK_ALIGNMENT(a);
|
||||
|
||||
a.s[0]*=b;
|
||||
a.s[1]*=b;
|
||||
a.s[2]*=b;
|
||||
a.s[3]*=b;
|
||||
}
|
||||
|
||||
//
|
||||
__inline
|
||||
float4 operator/(const float4& a, const float4& b)
|
||||
{
|
||||
CHECK_ALIGNMENT(a);
|
||||
|
||||
float4 out;
|
||||
out.s[0] = a.s[0]/b.s[0];
|
||||
out.s[1] = a.s[1]/b.s[1];
|
||||
out.s[2] = a.s[2]/b.s[2];
|
||||
out.s[3] = a.s[3]/b.s[3];
|
||||
return out;
|
||||
}
|
||||
|
||||
__inline
|
||||
float4 operator/(const float4& b, float a)
|
||||
{
|
||||
CHECK_ALIGNMENT(b);
|
||||
|
||||
return make_float4(b.s[0]/a, b.s[1]/a, b.s[2]/a, b.s[3]/a);
|
||||
}
|
||||
|
||||
__inline
|
||||
void operator/=(float4& a, const float4& b)
|
||||
{
|
||||
a.s[0]/=b.s[0];
|
||||
a.s[1]/=b.s[1];
|
||||
a.s[2]/=b.s[2];
|
||||
a.s[3]/=b.s[3];
|
||||
}
|
||||
|
||||
__inline
|
||||
void operator/=(float4& a, float b)
|
||||
{
|
||||
CLASSERT((u32(&a) & 0xf) == 0);
|
||||
|
||||
a.s[0]/=b;
|
||||
a.s[1]/=b;
|
||||
a.s[2]/=b;
|
||||
a.s[3]/=b;
|
||||
}
|
||||
//
|
||||
|
||||
__inline
|
||||
float4 operator+(const float4& a, const float4& b)
|
||||
{
|
||||
CHECK_ALIGNMENT(a);
|
||||
|
||||
float4 out;
|
||||
out.s[0] = a.s[0]+b.s[0];
|
||||
out.s[1] = a.s[1]+b.s[1];
|
||||
out.s[2] = a.s[2]+b.s[2];
|
||||
out.s[3] = a.s[3]+b.s[3];
|
||||
return out;
|
||||
}
|
||||
|
||||
__inline
|
||||
float4 operator+(const float4& a, float b)
|
||||
{
|
||||
CHECK_ALIGNMENT(a);
|
||||
|
||||
float4 out;
|
||||
out.s[0] = a.s[0]+b;
|
||||
out.s[1] = a.s[1]+b;
|
||||
out.s[2] = a.s[2]+b;
|
||||
out.s[3] = a.s[3]+b;
|
||||
return out;
|
||||
}
|
||||
|
||||
__inline
|
||||
float4 operator-(const float4& a, const float4& b)
|
||||
{
|
||||
CHECK_ALIGNMENT(a);
|
||||
|
||||
float4 out;
|
||||
out.s[0] = a.s[0]-b.s[0];
|
||||
out.s[1] = a.s[1]-b.s[1];
|
||||
out.s[2] = a.s[2]-b.s[2];
|
||||
out.s[3] = a.s[3]-b.s[3];
|
||||
return out;
|
||||
}
|
||||
|
||||
__inline
|
||||
float4 operator-(const float4& a, float b)
|
||||
{
|
||||
CHECK_ALIGNMENT(a);
|
||||
|
||||
float4 out;
|
||||
out.s[0] = a.s[0]-b;
|
||||
out.s[1] = a.s[1]-b;
|
||||
out.s[2] = a.s[2]-b;
|
||||
out.s[3] = a.s[3]-b;
|
||||
return out;
|
||||
}
|
||||
|
||||
__inline
|
||||
void operator+=(float4& a, const float4& b)
|
||||
{
|
||||
CHECK_ALIGNMENT(a);
|
||||
|
||||
a.s[0]+=b.s[0];
|
||||
a.s[1]+=b.s[1];
|
||||
a.s[2]+=b.s[2];
|
||||
a.s[3]+=b.s[3];
|
||||
}
|
||||
|
||||
__inline
|
||||
void operator+=(float4& a, float b)
|
||||
{
|
||||
CHECK_ALIGNMENT(a);
|
||||
|
||||
a.s[0]+=b;
|
||||
a.s[1]+=b;
|
||||
a.s[2]+=b;
|
||||
a.s[3]+=b;
|
||||
}
|
||||
|
||||
__inline
|
||||
void operator-=(float4& a, const float4& b)
|
||||
{
|
||||
CHECK_ALIGNMENT(a);
|
||||
|
||||
a.s[0]-=b.s[0];
|
||||
a.s[1]-=b.s[1];
|
||||
a.s[2]-=b.s[2];
|
||||
a.s[3]-=b.s[3];
|
||||
}
|
||||
|
||||
__inline
|
||||
void operator-=(float4& a, float b)
|
||||
{
|
||||
CHECK_ALIGNMENT(a);
|
||||
|
||||
a.s[0]-=b;
|
||||
a.s[1]-=b;
|
||||
a.s[2]-=b;
|
||||
a.s[3]-=b;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
__inline
|
||||
float4 cross3(const float4& a, const float4& b)
|
||||
{
|
||||
return make_float4(a.s[1]*b.s[2]-a.s[2]*b.s[1],
|
||||
a.s[2]*b.s[0]-a.s[0]*b.s[2],
|
||||
a.s[0]*b.s[1]-a.s[1]*b.s[0],
|
||||
0);
|
||||
}
|
||||
|
||||
__inline
|
||||
float dot3F4(const float4& a, const float4& b)
|
||||
{
|
||||
return a.x*b.x+a.y*b.y+a.z*b.z;
|
||||
}
|
||||
|
||||
__inline
|
||||
float length3(const float4& a)
|
||||
{
|
||||
return sqrtf(dot3F4(a,a));
|
||||
}
|
||||
|
||||
__inline
|
||||
float dot4(const float4& a, const float4& b)
|
||||
{
|
||||
return a.x*b.x+a.y*b.y+a.z*b.z+a.w*b.w;
|
||||
}
|
||||
|
||||
// for height
|
||||
__inline
|
||||
float dot3w1(const float4& point, const float4& eqn)
|
||||
{
|
||||
return point.x*eqn.x+point.y*eqn.y+point.z*eqn.z+eqn.w;
|
||||
}
|
||||
|
||||
__inline
|
||||
float4 normalize3(const float4& a)
|
||||
{
|
||||
float length = sqrtf(dot3F4(a, a));
|
||||
return 1.f/length * a;
|
||||
}
|
||||
|
||||
__inline
|
||||
float4 normalize4(const float4& a)
|
||||
{
|
||||
float length = sqrtf(dot4(a, a));
|
||||
return 1.f/length * a;
|
||||
}
|
||||
|
||||
__inline
|
||||
float4 createEquation(const float4& a, const float4& b, const float4& c)
|
||||
{
|
||||
float4 eqn;
|
||||
float4 ab = b-a;
|
||||
float4 ac = c-a;
|
||||
eqn = normalize3( cross3(ab, ac) );
|
||||
eqn.w = -dot3F4(eqn,a);
|
||||
return eqn;
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
__inline
|
||||
T max2(const T& a, const T& b)
|
||||
{
|
||||
return (a>b)? a:b;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
__inline
|
||||
T min2(const T& a, const T& b)
|
||||
{
|
||||
return (a<b)? a:b;
|
||||
}
|
||||
|
||||
template<>
|
||||
__inline
|
||||
float4 max2(const float4& a, const float4& b)
|
||||
{
|
||||
return make_float4( max2(a.x,b.x), max2(a.y,b.y), max2(a.z,b.z), max2(a.w,b.w) );
|
||||
}
|
||||
|
||||
template<>
|
||||
__inline
|
||||
float4 min2(const float4& a, const float4& b)
|
||||
{
|
||||
return make_float4( min2(a.x,b.x), min2(a.y,b.y), min2(a.z,b.z), min2(a.w,b.w) );
|
||||
}
|
||||
|
||||
@@ -0,0 +1,381 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
//#define CHECK_ALIGNMENT(a) CLASSERT((u32(&(a)) & 0xf) == 0);
|
||||
#define CHECK_ALIGNMENT(a) a;
|
||||
|
||||
|
||||
__inline
|
||||
float4 make_float4(float x, float y, float z, float w = 0.f)
|
||||
{
|
||||
float4 v;
|
||||
v.m_quad = _mm_set_ps(w,z,y,x);
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
__inline
|
||||
float4 make_float4(float x)
|
||||
{
|
||||
return make_float4(x,x,x,x);
|
||||
}
|
||||
|
||||
__inline
|
||||
float4 make_float4(const int4& x)
|
||||
{
|
||||
return make_float4((float)x.s[0], (float)x.s[1], (float)x.s[2], (float)x.s[3]);
|
||||
}
|
||||
|
||||
__inline
|
||||
float2 make_float2(float x, float y)
|
||||
{
|
||||
float2 v;
|
||||
v.s[0] = x; v.s[1] = y;
|
||||
return v;
|
||||
}
|
||||
|
||||
__inline
|
||||
float2 make_float2(float x)
|
||||
{
|
||||
return make_float2(x,x);
|
||||
}
|
||||
|
||||
__inline
|
||||
float2 make_float2(const int2& x)
|
||||
{
|
||||
return make_float2((float)x.s[0], (float)x.s[1]);
|
||||
}
|
||||
|
||||
__inline
|
||||
int4 make_int4(int x, int y, int z, int w = 0)
|
||||
{
|
||||
int4 v;
|
||||
v.s[0] = x; v.s[1] = y; v.s[2] = z; v.s[3] = w;
|
||||
return v;
|
||||
}
|
||||
|
||||
__inline
|
||||
int4 make_int4(int x)
|
||||
{
|
||||
return make_int4(x,x,x,x);
|
||||
}
|
||||
|
||||
__inline
|
||||
int4 make_int4(const float4& x)
|
||||
{
|
||||
return make_int4((int)x.x, (int)x.y, (int)x.z, (int)x.w);
|
||||
}
|
||||
|
||||
__inline
|
||||
int2 make_int2(int a, int b)
|
||||
{
|
||||
int2 ans; ans.x = a; ans.y = b;
|
||||
return ans;
|
||||
}
|
||||
|
||||
__inline
|
||||
float4 operator-(const float4& a)
|
||||
{
|
||||
float4 zero; zero.m_quad = _mm_setzero_ps();
|
||||
float4 ans; ans.m_quad = _mm_sub_ps( zero.m_quad, a.m_quad );
|
||||
return ans;
|
||||
}
|
||||
|
||||
__inline
|
||||
float4 operator*(const float4& a, const float4& b)
|
||||
{
|
||||
CHECK_ALIGNMENT(a);
|
||||
|
||||
float4 out;
|
||||
out.m_quad = _mm_mul_ps( a.m_quad, b.m_quad );
|
||||
return out;
|
||||
}
|
||||
|
||||
__inline
|
||||
float4 operator*(float a, const float4& b)
|
||||
{
|
||||
float4 av; av.m_quad = _mm_set1_ps( a );
|
||||
return av*b;
|
||||
}
|
||||
|
||||
__inline
|
||||
float4 operator*(const float4& b, float a)
|
||||
{
|
||||
CHECK_ALIGNMENT(b);
|
||||
|
||||
float4 av; av.m_quad = _mm_set1_ps( a );
|
||||
return av*b;
|
||||
}
|
||||
|
||||
__inline
|
||||
void operator*=(float4& a, const float4& b)
|
||||
{
|
||||
CHECK_ALIGNMENT(a);
|
||||
|
||||
a = a*b;
|
||||
}
|
||||
|
||||
__inline
|
||||
void operator*=(float4& a, float b)
|
||||
{
|
||||
CHECK_ALIGNMENT(a);
|
||||
|
||||
float4 bv; bv.m_quad = _mm_set1_ps( b );
|
||||
a = a*bv;
|
||||
}
|
||||
|
||||
//
|
||||
__inline
|
||||
float4 operator/(const float4& a, const float4& b)
|
||||
{
|
||||
CHECK_ALIGNMENT(a);
|
||||
|
||||
float4 out;
|
||||
out.m_quad = _mm_div_ps( a.m_quad, b.m_quad );
|
||||
return out;
|
||||
}
|
||||
|
||||
__inline
|
||||
float4 operator/(const float4& b, float a)
|
||||
{
|
||||
CHECK_ALIGNMENT(b);
|
||||
|
||||
float4 av; av.m_quad = _mm_set1_ps( a );
|
||||
float4 out;
|
||||
out = b/av;
|
||||
return out;
|
||||
}
|
||||
|
||||
__inline
|
||||
void operator/=(float4& a, const float4& b)
|
||||
{
|
||||
a = a/b;
|
||||
}
|
||||
|
||||
__inline
|
||||
void operator/=(float4& a, float b)
|
||||
{
|
||||
CLASSERT((u32(&a) & 0xf) == 0);
|
||||
|
||||
float4 bv; bv.m_quad = _mm_set1_ps( b );
|
||||
a = a/bv;
|
||||
}
|
||||
//
|
||||
|
||||
__inline
|
||||
float4 operator+(const float4& a, const float4& b)
|
||||
{
|
||||
CHECK_ALIGNMENT(a);
|
||||
|
||||
float4 out;
|
||||
out.m_quad = _mm_add_ps( a.m_quad, b.m_quad );
|
||||
return out;
|
||||
}
|
||||
|
||||
__inline
|
||||
float4 operator+(const float4& a, float b)
|
||||
{
|
||||
CHECK_ALIGNMENT(a);
|
||||
|
||||
float4 bv; bv.m_quad = _mm_set1_ps( b );
|
||||
return a+bv;
|
||||
}
|
||||
|
||||
__inline
|
||||
float4 operator-(const float4& a, const float4& b)
|
||||
{
|
||||
CHECK_ALIGNMENT(a);
|
||||
|
||||
float4 out;
|
||||
out.m_quad = _mm_sub_ps( a.m_quad, b.m_quad );
|
||||
return out;
|
||||
}
|
||||
|
||||
__inline
|
||||
float4 operator-(const float4& a, float b)
|
||||
{
|
||||
CHECK_ALIGNMENT(a);
|
||||
|
||||
float4 bv; bv.m_quad = _mm_set1_ps( b );
|
||||
return a-bv;
|
||||
}
|
||||
|
||||
__inline
|
||||
void operator+=(float4& a, const float4& b)
|
||||
{
|
||||
CHECK_ALIGNMENT(a);
|
||||
|
||||
a = a + b;
|
||||
}
|
||||
|
||||
__inline
|
||||
void operator+=(float4& a, float b)
|
||||
{
|
||||
CHECK_ALIGNMENT(a);
|
||||
|
||||
float4 bv; bv.m_quad = _mm_set1_ps( b );
|
||||
|
||||
a = a + bv;
|
||||
}
|
||||
|
||||
__inline
|
||||
void operator-=(float4& a, const float4& b)
|
||||
{
|
||||
CHECK_ALIGNMENT(a);
|
||||
|
||||
a = a - b;
|
||||
}
|
||||
|
||||
__inline
|
||||
void operator-=(float4& a, float b)
|
||||
{
|
||||
CHECK_ALIGNMENT(a);
|
||||
|
||||
float4 bv; bv.m_quad = _mm_set1_ps( b );
|
||||
|
||||
a = a - bv;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
__inline
|
||||
float4 cross3(const float4& a, const float4& b)
|
||||
{ // xnamathvector.inl
|
||||
union IntVec
|
||||
{
|
||||
unsigned int m_i[4];
|
||||
__m128 m_v;
|
||||
};
|
||||
|
||||
IntVec mask3 = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000};
|
||||
__m128 V1 = a.m_quad;
|
||||
__m128 V2 = b.m_quad;
|
||||
|
||||
__m128 vTemp1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(3,0,2,1));
|
||||
// z2,x2,y2,w2
|
||||
__m128 vTemp2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(3,1,0,2));
|
||||
// Perform the left operation
|
||||
__m128 vResult = _mm_mul_ps(vTemp1,vTemp2);
|
||||
// z1,x1,y1,w1
|
||||
vTemp1 = _mm_shuffle_ps(vTemp1,vTemp1,_MM_SHUFFLE(3,0,2,1));
|
||||
// y2,z2,x2,w2
|
||||
vTemp2 = _mm_shuffle_ps(vTemp2,vTemp2,_MM_SHUFFLE(3,1,0,2));
|
||||
// Perform the right operation
|
||||
vTemp1 = _mm_mul_ps(vTemp1,vTemp2);
|
||||
// Subract the right from left, and return answer
|
||||
vResult = _mm_sub_ps(vResult,vTemp1);
|
||||
// Set w to zero
|
||||
float4 ans; ans.m_quad = _mm_and_ps(vResult,mask3.m_v);
|
||||
return ans;
|
||||
}
|
||||
|
||||
__inline
|
||||
float dot3F4(const float4& a, const float4& b)
|
||||
{
|
||||
// return a.x*b.x+a.y*b.y+a.z*b.z;
|
||||
// Perform the dot product
|
||||
__m128 V1 = a.m_quad;
|
||||
__m128 V2 = b.m_quad;
|
||||
|
||||
__m128 vDot = _mm_mul_ps(V1,V2);
|
||||
// x=Dot.vector4_f32[1], y=Dot.vector4_f32[2]
|
||||
__m128 vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1));
|
||||
// Result.vector4_f32[0] = x+y
|
||||
vDot = _mm_add_ss(vDot,vTemp);
|
||||
// x=Dot.vector4_f32[2]
|
||||
vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
|
||||
// Result.vector4_f32[0] = (x+y)+z
|
||||
vDot = _mm_add_ss(vDot,vTemp);
|
||||
// Splat x
|
||||
float4 ans; ans.m_quad = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0));
|
||||
return ans.x;
|
||||
}
|
||||
|
||||
__inline
|
||||
float length3(const float4& a)
|
||||
{
|
||||
return sqrtf(dot3F4(a,a));
|
||||
}
|
||||
|
||||
__inline
|
||||
float dot4(const float4& a, const float4& b)
|
||||
{
|
||||
return a.x*b.x+a.y*b.y+a.z*b.z+a.w*b.w;
|
||||
}
|
||||
|
||||
// for height
|
||||
__inline
|
||||
float dot3w1(const float4& point, const float4& eqn)
|
||||
{
|
||||
return point.x*eqn.x+point.y*eqn.y+point.z*eqn.z+eqn.w;
|
||||
}
|
||||
|
||||
__inline
|
||||
float4 normalize3(const float4& a)
|
||||
{
|
||||
float length = sqrtf(dot3F4(a, a));
|
||||
return 1.f/length * a;
|
||||
}
|
||||
|
||||
__inline
|
||||
float4 normalize4(const float4& a)
|
||||
{
|
||||
float length = sqrtf(dot4(a, a));
|
||||
return 1.f/length * a;
|
||||
}
|
||||
|
||||
__inline
|
||||
float4 createEquation(const float4& a, const float4& b, const float4& c)
|
||||
{
|
||||
float4 eqn;
|
||||
float4 ab = b-a;
|
||||
float4 ac = c-a;
|
||||
eqn = normalize3( cross3(ab, ac) );
|
||||
eqn.w = -dot3F4(eqn,a);
|
||||
return eqn;
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
__inline
|
||||
T max2(const T& a, const T& b)
|
||||
{
|
||||
return (a>b)? a:b;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
__inline
|
||||
T min2(const T& a, const T& b)
|
||||
{
|
||||
return (a<b)? a:b;
|
||||
}
|
||||
|
||||
template<>
|
||||
__inline
|
||||
float4 max2(const float4& a, const float4& b)
|
||||
{
|
||||
return make_float4( max2(a.x,b.x), max2(a.y,b.y), max2(a.z,b.z), max2(a.w,b.w) );
|
||||
}
|
||||
|
||||
template<>
|
||||
__inline
|
||||
float4 min2(const float4& a, const float4& b)
|
||||
{
|
||||
return make_float4( min2(a.x,b.x), min2(a.y,b.y), min2(a.z,b.z), min2(a.w,b.w) );
|
||||
}
|
||||
|
||||
@@ -0,0 +1,154 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <Adl/Adl.h>
|
||||
//#include <Common/Base/SyncObjects.h>
|
||||
|
||||
#include "AdlMath.h"
|
||||
#include "AdlContact4.h"
|
||||
#include "AdlRigidBody.h"
|
||||
|
||||
#include "../ConvexHeightFieldShape.h"
|
||||
|
||||
//#include "TypeDefinition.h"
|
||||
//#include "RigidBody.h"
|
||||
//#include "ConvexHeightFieldShape.h"
|
||||
|
||||
namespace adl
|
||||
{
|
||||
class ShapeBase;
|
||||
|
||||
class ChNarrowphaseBase
|
||||
{
|
||||
public:
|
||||
struct Config
|
||||
{
|
||||
float m_collisionMargin;
|
||||
};
|
||||
/*
|
||||
typedef struct
|
||||
{
|
||||
// m_normal.w == height in u8
|
||||
float4 m_normal[HEIGHT_RES*HEIGHT_RES*6];
|
||||
u32 m_height4[HEIGHT_RES*HEIGHT_RES*6];
|
||||
|
||||
float m_scale;
|
||||
float m_padding0;
|
||||
float m_padding1;
|
||||
float m_padding2;
|
||||
} ShapeData;
|
||||
*/
|
||||
};
|
||||
|
||||
template<DeviceType TYPE>
|
||||
class ChNarrowphase : public ChNarrowphaseBase
|
||||
{
|
||||
public:
|
||||
typedef Launcher::BufferInfo BufferInfo;
|
||||
|
||||
struct Data
|
||||
{
|
||||
const Device* m_device;
|
||||
Kernel* m_supportCullingKernel;
|
||||
Kernel* m_narrowphaseKernel;
|
||||
Kernel* m_narrowphaseWithPlaneKernel;
|
||||
|
||||
Buffer<u32>* m_counterBuffer;
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
N_TASKS = 4,
|
||||
HEIGHT_RES = ConvexHeightField::HEIGHT_RES,
|
||||
};
|
||||
|
||||
struct ShapeData
|
||||
{
|
||||
float4 m_normal[HEIGHT_RES*HEIGHT_RES*6];
|
||||
u32 m_height4[HEIGHT_RES*HEIGHT_RES*6];
|
||||
u32 m_supportHeight4[HEIGHT_RES*HEIGHT_RES*6];
|
||||
|
||||
float m_scale;
|
||||
float m_padding0;
|
||||
float m_padding1;
|
||||
float m_padding2;
|
||||
};
|
||||
|
||||
struct ConstData
|
||||
{
|
||||
int m_nPairs;
|
||||
float m_collisionMargin;
|
||||
int m_capacity;
|
||||
int m_paddings[1];
|
||||
};
|
||||
|
||||
static
|
||||
Data* allocate( const Device* device );
|
||||
|
||||
static
|
||||
void deallocate( Data* data );
|
||||
/*
|
||||
static
|
||||
Buffer<ShapeData>* allocateShapeBuffer( const Device* device, int capacity );
|
||||
|
||||
static
|
||||
void deallocateShapeBuffer( Buffer<ShapeData>* shapeBuf );
|
||||
|
||||
static
|
||||
void setShape( Buffer<ShapeData>* shapeBuf, ShapeBase* shape, int idx, float collisionMargin );
|
||||
*/
|
||||
static
|
||||
ShapeDataType allocateShapeBuffer( const Device* device, int capacity );
|
||||
|
||||
static
|
||||
void deallocateShapeBuffer( ShapeDataType shapeBuf );
|
||||
|
||||
static
|
||||
void setShape( ShapeDataType shapeBuf, ShapeBase* shape, int idx, float collisionMargin = 0.f );
|
||||
|
||||
static
|
||||
void setShape( ShapeDataType shapeBuf, ConvexHeightField* cvxShape, int idx, float collisionMargin = 0.f );
|
||||
|
||||
// Run NarrowphaseKernel
|
||||
//template<bool USE_OMP>
|
||||
static
|
||||
void execute( Data* data, const Buffer<int2>* pairs, int nPairs,
|
||||
const Buffer<RigidBodyBase::Body>* bodyBuf, const ShapeDataType shapeBuf,
|
||||
Buffer<Contact4>* contactOut, int& nContacts, const Config& cfg );
|
||||
|
||||
// Run NarrowphaseWithPlaneKernel
|
||||
//template<bool USE_OMP>
|
||||
static
|
||||
void execute( Data* data, const Buffer<int2>* pairs, int nPairs,
|
||||
const Buffer<RigidBodyBase::Body>* bodyBuf, const ShapeDataType shapeBuf,
|
||||
const Buffer<float4>* vtxBuf, const Buffer<int4>* idxBuf,
|
||||
Buffer<Contact4>* contactOut, int& nContacts, const Config& cfg );
|
||||
|
||||
// Run SupportCullingKernel
|
||||
//template<bool USE_OMP>
|
||||
static
|
||||
int culling( Data* data, const Buffer<int2>* pairs, int nPairs, const Buffer<RigidBodyBase::Body>* bodyBuf,
|
||||
const ShapeDataType shapeBuf, const Buffer<int2>* pairsOut, const Config& cfg );
|
||||
};
|
||||
|
||||
//#include <AdlPhysics/Narrowphase/ChNarrowphase.inl>
|
||||
//#include <AdlPhysics/Narrowphase/ChNarrowphaseHost.inl>
|
||||
|
||||
#include "ChNarrowphase.inl"
|
||||
|
||||
};
|
||||
@@ -0,0 +1,303 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
//#define PATH "..\\..\\dynamics\\basic_demo\\Stubs\\ChNarrowphaseKernels"
|
||||
#define PATH "..\\..\\dynamics\\basic_demo\\Stubs\\ChNarrowphaseKernels"
|
||||
#define KERNEL0 "SupportCullingKernel"
|
||||
#define KERNEL1 "NarrowphaseKernel"
|
||||
|
||||
#include "ChNarrowphaseKernels.h"
|
||||
|
||||
class ChNarrowphaseImp
|
||||
{
|
||||
public:
|
||||
static
|
||||
__inline
|
||||
u32 u32Pack(u8 x, u8 y, u8 z, u8 w)
|
||||
{
|
||||
return (x) | (y<<8) | (z<<16) | (w<<24);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template<DeviceType TYPE>
|
||||
typename ChNarrowphase<TYPE>::Data* ChNarrowphase<TYPE>::allocate( const Device* device )
|
||||
{
|
||||
char options[100];
|
||||
|
||||
const char* src[] =
|
||||
#if defined(ADL_LOAD_KERNEL_FROM_STRING)
|
||||
{narrowphaseKernelsCL, 0};
|
||||
#else
|
||||
{0,0};
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
//sprintf(options, "-I ..\\..\\ -Wf,--c++");
|
||||
sprintf(options, "-I .\\NarrowPhaseCL\\");
|
||||
|
||||
Data* data = new Data;
|
||||
data->m_device = device;
|
||||
data->m_supportCullingKernel = device->getKernel( PATH, KERNEL0, options,src[TYPE] );
|
||||
data->m_narrowphaseKernel = device->getKernel( PATH, KERNEL1, options, src[TYPE]);
|
||||
data->m_narrowphaseWithPlaneKernel = device->getKernel( PATH, "NarrowphaseWithPlaneKernel", options,src[TYPE]);
|
||||
data->m_counterBuffer = new Buffer<u32>( device, 1 );
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
|
||||
template<DeviceType TYPE>
|
||||
void ChNarrowphase<TYPE>::deallocate( Data* data )
|
||||
{
|
||||
delete data->m_counterBuffer;
|
||||
|
||||
delete data;
|
||||
}
|
||||
|
||||
template<DeviceType TYPE>
|
||||
ShapeDataType ChNarrowphase<TYPE>::allocateShapeBuffer( const Device* device, int capacity )
|
||||
{
|
||||
ADLASSERT( device->m_type == TYPE );
|
||||
|
||||
return new Buffer<ShapeData>( device, capacity );
|
||||
}
|
||||
|
||||
template<DeviceType TYPE>
|
||||
void ChNarrowphase<TYPE>::deallocateShapeBuffer( ShapeDataType shapeBuf )
|
||||
{
|
||||
Buffer<ShapeData>* s = (Buffer<ShapeData>*)shapeBuf;
|
||||
delete s;
|
||||
}
|
||||
|
||||
template<DeviceType TYPE>
|
||||
void ChNarrowphase<TYPE>::setShape( ShapeDataType shapeBuf, ShapeBase* shape, int idx, float collisionMargin )
|
||||
{
|
||||
ConvexHeightField* cvxShape = new ConvexHeightField( shape );
|
||||
Buffer<ShapeData>* dst = (Buffer<ShapeData>*)shapeBuf;
|
||||
cvxShape->m_aabb.expandBy( make_float4( collisionMargin ) );
|
||||
{
|
||||
ShapeData s;
|
||||
{
|
||||
for(int j=0; j<HEIGHT_RES*HEIGHT_RES*6; j++)
|
||||
{
|
||||
s.m_normal[j] = cvxShape->m_normal[j];
|
||||
}
|
||||
for(int j=0; j<HEIGHT_RES*HEIGHT_RES*6/4; j++)
|
||||
{
|
||||
s.m_height4[j] = ChNarrowphaseImp::u32Pack( cvxShape->m_data[4*j], cvxShape->m_data[4*j+1], cvxShape->m_data[4*j+2], cvxShape->m_data[4*j+3] );
|
||||
s.m_supportHeight4[j] = ChNarrowphaseImp::u32Pack( cvxShape->m_supportHeight[4*j], cvxShape->m_supportHeight[4*j+1], cvxShape->m_supportHeight[4*j+2], cvxShape->m_supportHeight[4*j+3] );
|
||||
}
|
||||
s.m_scale = cvxShape->m_scale;
|
||||
}
|
||||
dst->write( &s, 1, idx );
|
||||
DeviceUtils::waitForCompletion( dst->m_device );
|
||||
}
|
||||
delete cvxShape;
|
||||
}
|
||||
|
||||
template<DeviceType TYPE>
|
||||
void ChNarrowphase<TYPE>::setShape( ShapeDataType shapeBuf, ConvexHeightField* cvxShape, int idx, float collisionMargin )
|
||||
{
|
||||
Buffer<ShapeData>* dst = (Buffer<ShapeData>*)shapeBuf;
|
||||
cvxShape->m_aabb.expandBy( make_float4( collisionMargin ) );
|
||||
{
|
||||
ShapeData s;
|
||||
{
|
||||
for(int j=0; j<HEIGHT_RES*HEIGHT_RES*6; j++)
|
||||
{
|
||||
s.m_normal[j] = cvxShape->m_normal[j];
|
||||
}
|
||||
for(int j=0; j<HEIGHT_RES*HEIGHT_RES*6/4; j++)
|
||||
{
|
||||
s.m_height4[j] = ChNarrowphaseImp::u32Pack( cvxShape->m_data[4*j], cvxShape->m_data[4*j+1], cvxShape->m_data[4*j+2], cvxShape->m_data[4*j+3] );
|
||||
s.m_supportHeight4[j] = ChNarrowphaseImp::u32Pack( cvxShape->m_supportHeight[4*j], cvxShape->m_supportHeight[4*j+1], cvxShape->m_supportHeight[4*j+2], cvxShape->m_supportHeight[4*j+3] );
|
||||
}
|
||||
s.m_scale = cvxShape->m_scale;
|
||||
}
|
||||
dst->write( &s, 1, idx );
|
||||
DeviceUtils::waitForCompletion( dst->m_device );
|
||||
}
|
||||
}
|
||||
|
||||
// Run NarrowphaseKernel
|
||||
template<DeviceType TYPE>
|
||||
//template<bool USE_OMP>
|
||||
void ChNarrowphase<TYPE>::execute( Data* data, const Buffer<int2>* pairs, int nPairs, const Buffer<RigidBodyBase::Body>* bodyBuf,
|
||||
const ShapeDataType shapeBuf,
|
||||
Buffer<Contact4>* contactOut, int& nContacts, const Config& cfg )
|
||||
{
|
||||
if( nPairs == 0 ) return;
|
||||
|
||||
Buffer<ShapeData>* shapeBuffer = (Buffer<ShapeData>*)shapeBuf;
|
||||
ADLASSERT( shapeBuffer->getType() == TYPE );
|
||||
|
||||
const Device* device = data->m_device;
|
||||
|
||||
Buffer<int2>* gPairsInNative
|
||||
= BufferUtils::map<TYPE, true>( data->m_device, pairs );
|
||||
Buffer<RigidBodyBase::Body>* gBodyInNative
|
||||
= BufferUtils::map<TYPE, true>( data->m_device, bodyBuf );
|
||||
Buffer<Contact4>* gContactOutNative
|
||||
= BufferUtils::map<TYPE, true>( data->m_device, contactOut ); // this might not be empty
|
||||
|
||||
Buffer<ConstData> constBuffer( device, 1, BufferBase::BUFFER_CONST );
|
||||
|
||||
ConstData cdata;
|
||||
cdata.m_nPairs = nPairs;
|
||||
cdata.m_collisionMargin = cfg.m_collisionMargin;
|
||||
cdata.m_capacity = contactOut->getSize() - nContacts;
|
||||
|
||||
u32 n = nContacts;
|
||||
data->m_counterBuffer->write( &n, 1 );
|
||||
// DeviceUtils::waitForCompletion( device );
|
||||
|
||||
{
|
||||
BufferInfo bInfo[] = { BufferInfo( gPairsInNative, true ), BufferInfo( shapeBuffer ), BufferInfo( gBodyInNative ),
|
||||
BufferInfo( gContactOutNative ),
|
||||
BufferInfo( data->m_counterBuffer ) };
|
||||
Launcher launcher( data->m_device, data->m_narrowphaseKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
|
||||
launcher.setConst( constBuffer, cdata );
|
||||
launcher.launch1D( nPairs*64, 64 );
|
||||
}
|
||||
|
||||
data->m_counterBuffer->read( &n, 1 );
|
||||
DeviceUtils::waitForCompletion( device );
|
||||
|
||||
BufferUtils::unmap<false>( gPairsInNative, pairs );
|
||||
BufferUtils::unmap<false>( gBodyInNative, bodyBuf );
|
||||
BufferUtils::unmap<true>( gContactOutNative, contactOut );
|
||||
|
||||
nContacts = min2((int)n, contactOut->getSize() );
|
||||
}
|
||||
|
||||
// Run NarrowphaseWithPlaneKernel
|
||||
template<DeviceType TYPE>
|
||||
//template<bool USE_OMP>
|
||||
void ChNarrowphase<TYPE>::execute( Data* data, const Buffer<int2>* pairs, int nPairs,
|
||||
const Buffer<RigidBodyBase::Body>* bodyBuf, const ShapeDataType shapeBuf,
|
||||
const Buffer<float4>* vtxBuf, const Buffer<int4>* idxBuf,
|
||||
Buffer<Contact4>* contactOut, int& nContacts, const Config& cfg )
|
||||
{
|
||||
if( nPairs == 0 ) return;
|
||||
|
||||
Buffer<ShapeData>* shapeBuffer = (Buffer<ShapeData>*)shapeBuf;
|
||||
ADLASSERT( shapeBuffer->getType() == TYPE );
|
||||
|
||||
const Device* device = data->m_device;
|
||||
|
||||
Buffer<int2>* gPairsInNative
|
||||
= BufferUtils::map<TYPE, true>( data->m_device, pairs );
|
||||
Buffer<RigidBodyBase::Body>* gBodyInNative
|
||||
= BufferUtils::map<TYPE, true>( data->m_device, bodyBuf );
|
||||
Buffer<Contact4>* gContactOutNative
|
||||
= BufferUtils::map<TYPE, true>( data->m_device, contactOut ); // this might not be empty
|
||||
|
||||
Buffer<ConstData> constBuffer( device, 1, BufferBase::BUFFER_CONST );
|
||||
|
||||
ConstData cdata;
|
||||
cdata.m_nPairs = nPairs;
|
||||
cdata.m_collisionMargin = cfg.m_collisionMargin;
|
||||
cdata.m_capacity = contactOut->getSize() - nContacts;
|
||||
|
||||
u32 n = nContacts;
|
||||
data->m_counterBuffer->write( &n, 1 );
|
||||
// DeviceUtils::waitForCompletion( device );
|
||||
|
||||
{
|
||||
BufferInfo bInfo[] = { BufferInfo( gPairsInNative, true ), BufferInfo( shapeBuffer ), BufferInfo( gBodyInNative ),
|
||||
BufferInfo( gContactOutNative ),
|
||||
BufferInfo( data->m_counterBuffer ) };
|
||||
Launcher launcher( data->m_device, data->m_narrowphaseWithPlaneKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
|
||||
launcher.setConst( constBuffer, cdata );
|
||||
launcher.launch1D( nPairs*64, 64 );
|
||||
}
|
||||
|
||||
data->m_counterBuffer->read( &n, 1 );
|
||||
DeviceUtils::waitForCompletion( device );
|
||||
|
||||
BufferUtils::unmap<false>( gPairsInNative, pairs );
|
||||
BufferUtils::unmap<false>( gBodyInNative, bodyBuf );
|
||||
BufferUtils::unmap<true>( gContactOutNative, contactOut );
|
||||
|
||||
nContacts = min2((int)n, contactOut->getSize() );
|
||||
}
|
||||
|
||||
// Run SupportCullingKernel
|
||||
template<DeviceType TYPE>
|
||||
//template<bool USE_OMP>
|
||||
int ChNarrowphase<TYPE>::culling( Data* data, const Buffer<int2>* pairs, int nPairs, const Buffer<RigidBodyBase::Body>* bodyBuf,
|
||||
const ShapeDataType shapeBuf, const Buffer<int2>* pairsOut, const Config& cfg )
|
||||
{
|
||||
if( nPairs == 0 ) return 0;
|
||||
|
||||
Buffer<ShapeData>* shapeBuffer = (Buffer<ShapeData>*)shapeBuf;
|
||||
ADLASSERT( shapeBuffer->getType() == TYPE );
|
||||
|
||||
const Device* device = data->m_device;
|
||||
|
||||
Buffer<int2>* gPairsInNative
|
||||
= BufferUtils::map<TYPE, true>( data->m_device, pairs );
|
||||
Buffer<RigidBodyBase::Body>* gBodyInNative
|
||||
= BufferUtils::map<TYPE, true>( data->m_device, bodyBuf );
|
||||
Buffer<int2>* gPairsOutNative
|
||||
= BufferUtils::map<TYPE, false>( data->m_device, pairsOut );
|
||||
|
||||
//
|
||||
Buffer<ConstData> constBuffer( device, 1, BufferBase::BUFFER_CONST );
|
||||
|
||||
ConstData cdata;
|
||||
cdata.m_nPairs = nPairs;
|
||||
cdata.m_collisionMargin = cfg.m_collisionMargin;
|
||||
cdata.m_capacity = pairsOut->getSize();
|
||||
|
||||
u32 n = 0;
|
||||
data->m_counterBuffer->write( &n, 1 );
|
||||
// DeviceUtils::waitForCompletion( device );
|
||||
{
|
||||
BufferInfo bInfo[] = { BufferInfo( gPairsInNative, true ), BufferInfo( shapeBuffer ), BufferInfo( gBodyInNative ),
|
||||
BufferInfo( gPairsOutNative ), BufferInfo( data->m_counterBuffer ) };
|
||||
Launcher launcher( data->m_device, data->m_supportCullingKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
|
||||
launcher.setConst( constBuffer, cdata );
|
||||
launcher.launch1D( nPairs, 64 );
|
||||
}
|
||||
data->m_counterBuffer->read( &n, 1 );
|
||||
DeviceUtils::waitForCompletion( device );
|
||||
/*
|
||||
if( gPairsInNative != pairs ) delete gPairsInNative;
|
||||
if( gBodyInNative != bodyBuf ) delete gBodyInNative;
|
||||
if( gPairsOutNative != pairsOut )
|
||||
{
|
||||
gPairsOutNative->read( pairsOut->m_ptr, n );
|
||||
DeviceUtils::waitForCompletion( device );
|
||||
delete gPairsOutNative;
|
||||
}
|
||||
*/
|
||||
BufferUtils::unmap<false>( gPairsInNative, pairs );
|
||||
BufferUtils::unmap<false>( gBodyInNative, bodyBuf );
|
||||
BufferUtils::unmap<true>( gPairsOutNative, pairsOut );
|
||||
|
||||
return min2((int)n, pairsOut->getSize() );
|
||||
}
|
||||
|
||||
#undef PATH
|
||||
#undef KERNEL0
|
||||
#undef KERNEL1
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
203
Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/Solver.h
Normal file
203
Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/Solver.h
Normal file
@@ -0,0 +1,203 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
#pragma once
|
||||
#ifndef __ADL_SOLVER_H
|
||||
#define __ADL_SOLVER_H
|
||||
|
||||
|
||||
#include <Adl/Adl.h>
|
||||
#include <AdlPrimitives/Math/Math.h>
|
||||
#include <AdlPrimitives/Search/BoundSearch.h>
|
||||
#include <AdlPrimitives/Sort/RadixSort.h>
|
||||
#include <AdlPrimitives/Scan/PrefixScan.h>
|
||||
#include <AdlPrimitives/Sort/RadixSort32.h>
|
||||
|
||||
//#include <AdlPhysics/TypeDefinition.h>
|
||||
#include "AdlRigidBody.h"
|
||||
#include "AdlContact4.h"
|
||||
|
||||
//#include "AdlPhysics/Batching/Batching.h>
|
||||
|
||||
|
||||
#define MYF4 float4
|
||||
#define MAKE_MYF4 make_float4
|
||||
|
||||
//#define MYF4 float4sse
|
||||
//#define MAKE_MYF4 make_float4sse
|
||||
|
||||
#include "AdlConstraint4.h"
|
||||
|
||||
namespace adl
|
||||
{
|
||||
class SolverBase
|
||||
{
|
||||
public:
|
||||
|
||||
|
||||
struct ConstraintData
|
||||
{
|
||||
ConstraintData(): m_b(0.f), m_appliedRambdaDt(0.f) {}
|
||||
|
||||
float4 m_linear; // have to be normalized
|
||||
float4 m_angular0;
|
||||
float4 m_angular1;
|
||||
float m_jacCoeffInv;
|
||||
float m_b;
|
||||
float m_appliedRambdaDt;
|
||||
|
||||
u32 m_bodyAPtr;
|
||||
u32 m_bodyBPtr;
|
||||
|
||||
bool isInvalid() const { return ((u32)m_bodyAPtr+(u32)m_bodyBPtr) == 0; }
|
||||
float getFrictionCoeff() const { return m_linear.w; }
|
||||
void setFrictionCoeff(float coeff) { m_linear.w = coeff; }
|
||||
};
|
||||
|
||||
struct ConstraintCfg
|
||||
{
|
||||
ConstraintCfg( float dt = 0.f ): m_positionDrift( 0.005f ), m_positionConstraintCoeff( 0.2f ), m_dt(dt), m_staticIdx(-1) {}
|
||||
|
||||
float m_positionDrift;
|
||||
float m_positionConstraintCoeff;
|
||||
float m_dt;
|
||||
bool m_enableParallelSolve;
|
||||
float m_averageExtent;
|
||||
int m_staticIdx;
|
||||
};
|
||||
|
||||
static
|
||||
__inline
|
||||
Buffer<Contact4>* allocateContact4( const Device* device, int capacity )
|
||||
{
|
||||
return new Buffer<Contact4>( device, capacity );
|
||||
}
|
||||
|
||||
static
|
||||
__inline
|
||||
void deallocateContact4( Buffer<Contact4>* data ) { delete data; }
|
||||
|
||||
static
|
||||
__inline
|
||||
SolverData allocateConstraint4( const Device* device, int capacity )
|
||||
{
|
||||
return new Buffer<Constraint4>( device, capacity );
|
||||
}
|
||||
|
||||
static
|
||||
__inline
|
||||
void deallocateConstraint4( SolverData data ) { delete (Buffer<Constraint4>*)data; }
|
||||
|
||||
static
|
||||
__inline
|
||||
void* allocateFrictionConstraint( const Device* device, int capacity, u32 type = 0 )
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static
|
||||
__inline
|
||||
void deallocateFrictionConstraint( void* data )
|
||||
{
|
||||
}
|
||||
|
||||
enum
|
||||
{
|
||||
N_SPLIT = 16,
|
||||
N_BATCHES = 4,
|
||||
N_OBJ_PER_SPLIT = 10,
|
||||
N_TASKS_PER_BATCH = N_SPLIT*N_SPLIT,
|
||||
};
|
||||
};
|
||||
|
||||
template<DeviceType TYPE>
|
||||
class Solver : public SolverBase
|
||||
{
|
||||
public:
|
||||
typedef Launcher::BufferInfo BufferInfo;
|
||||
|
||||
struct Data
|
||||
{
|
||||
Data() : m_nIterations(4){}
|
||||
|
||||
const Device* m_device;
|
||||
void* m_parallelSolveData;
|
||||
int m_nIterations;
|
||||
Kernel* m_batchingKernel;
|
||||
Kernel* m_batchSolveKernel;
|
||||
Kernel* m_contactToConstraintKernel;
|
||||
Kernel* m_setSortDataKernel;
|
||||
Kernel* m_reorderContactKernel;
|
||||
Kernel* m_copyConstraintKernel;
|
||||
//typename RadixSort<TYPE>::Data* m_sort;
|
||||
typename RadixSort32<TYPE>::Data* m_sort32;
|
||||
typename BoundSearch<TYPE>::Data* m_search;
|
||||
typename PrefixScan<TYPE>::Data* m_scan;
|
||||
Buffer<SortData>* m_sortDataBuffer;
|
||||
Buffer<Contact4>* m_contactBuffer;
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
DYNAMIC_CONTACT_ALLOCATION_THRESHOLD = 2000000,
|
||||
};
|
||||
|
||||
static
|
||||
Data* allocate( const Device* device, int pairCapacity );
|
||||
|
||||
static
|
||||
void deallocate( Data* data );
|
||||
|
||||
static
|
||||
void reorderConvertToConstraints( Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf,
|
||||
const Buffer<RigidBodyBase::Inertia>* shapeBuf,
|
||||
Buffer<Contact4>* contactsIn, SolverData contactCOut, void* additionalData,
|
||||
int nContacts, const ConstraintCfg& cfg );
|
||||
|
||||
static
|
||||
void solveContactConstraint( Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf, const Buffer<RigidBodyBase::Inertia>* inertiaBuf,
|
||||
SolverData constraint, void* additionalData, int n );
|
||||
|
||||
// static
|
||||
// int createSolveTasks( int batchIdx, Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf, const Buffer<RigidBodyBase::Inertia>* shapeBuf,
|
||||
// SolverData constraint, int n, ThreadPool::Task* tasksOut[], int taskCapacity );
|
||||
|
||||
|
||||
//private:
|
||||
static
|
||||
void convertToConstraints( Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf,
|
||||
const Buffer<RigidBodyBase::Inertia>* shapeBuf,
|
||||
Buffer<Contact4>* contactsIn, SolverData contactCOut, void* additionalData,
|
||||
int nContacts, const ConstraintCfg& cfg );
|
||||
|
||||
static
|
||||
void sortContacts( Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf,
|
||||
Buffer<Contact4>* contactsIn, void* additionalData,
|
||||
int nContacts, const ConstraintCfg& cfg );
|
||||
|
||||
static
|
||||
void batchContacts( Data* data, Buffer<Contact4>* contacts, int nContacts, Buffer<u32>* n, Buffer<u32>* offsets, int staticIdx );
|
||||
|
||||
};
|
||||
|
||||
#include "Solver.inl"
|
||||
#include "SolverHost.inl"
|
||||
};
|
||||
|
||||
#undef MYF4
|
||||
#undef MAKE_MYF4
|
||||
|
||||
#endif //__ADL_SOLVER_H
|
||||
762
Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/Solver.inl
Normal file
762
Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/Solver.inl
Normal file
@@ -0,0 +1,762 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
#define PATH "..\\..\\dynamics\\basic_demo\\Stubs\\SolverKernels"
|
||||
#define BATCHING_PATH "..\\..\\dynamics\\basic_demo\\Stubs\\batchingKernels"
|
||||
|
||||
#define KERNEL1 "SingleBatchSolveKernel"
|
||||
#define KERNEL2 "BatchSolveKernel"
|
||||
|
||||
#define KERNEL3 "ContactToConstraintKernel"
|
||||
#define KERNEL4 "SetSortDataKernel"
|
||||
#define KERNEL5 "ReorderContactKernel"
|
||||
#include "SolverKernels.h"
|
||||
|
||||
#include "batchingKernels.h"
|
||||
|
||||
|
||||
struct SolverDebugInfo
|
||||
{
|
||||
int m_valInt0;
|
||||
int m_valInt1;
|
||||
int m_valInt2;
|
||||
int m_valInt3;
|
||||
|
||||
int m_valInt4;
|
||||
int m_valInt5;
|
||||
int m_valInt6;
|
||||
int m_valInt7;
|
||||
|
||||
int m_valInt8;
|
||||
int m_valInt9;
|
||||
int m_valInt10;
|
||||
int m_valInt11;
|
||||
|
||||
int m_valInt12;
|
||||
int m_valInt13;
|
||||
int m_valInt14;
|
||||
int m_valInt15;
|
||||
|
||||
|
||||
float m_val0;
|
||||
float m_val1;
|
||||
float m_val2;
|
||||
float m_val3;
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
class SolverDeviceInl
|
||||
{
|
||||
public:
|
||||
struct ParallelSolveData
|
||||
{
|
||||
Buffer<u32>* m_numConstraints;
|
||||
Buffer<u32>* m_offsets;
|
||||
};
|
||||
};
|
||||
|
||||
template<DeviceType TYPE>
|
||||
typename Solver<TYPE>::Data* Solver<TYPE>::allocate( const Device* device, int pairCapacity )
|
||||
{
|
||||
const char* src[] =
|
||||
#if defined(ADL_LOAD_KERNEL_FROM_STRING)
|
||||
{solverKernelsCL, 0};
|
||||
#else
|
||||
{0,0};
|
||||
#endif
|
||||
|
||||
const char* src2[] =
|
||||
#if defined(ADL_LOAD_KERNEL_FROM_STRING)
|
||||
{batchingKernelsCL, 0};
|
||||
#else
|
||||
{0,0};
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
Data* data = new Data;
|
||||
data->m_device = device;
|
||||
bool cacheBatchingKernel = true;
|
||||
data->m_batchingKernel = device->getKernel( BATCHING_PATH, "CreateBatches", "-I ..\\..\\ ", src2[TYPE],cacheBatchingKernel);
|
||||
//data->m_batchingKernel = device->getKernel( BATCHING_PATH, "CreateBatches", "-I ..\\..\\ ", 0,cacheBatchingKernel);
|
||||
bool cacheSolverKernel = true;
|
||||
|
||||
data->m_batchSolveKernel = device->getKernel( PATH, KERNEL2, "-I ..\\..\\ ", src[TYPE],cacheSolverKernel );
|
||||
data->m_contactToConstraintKernel = device->getKernel( PATH, KERNEL3,
|
||||
"-I ..\\..\\ ", src[TYPE] );
|
||||
data->m_setSortDataKernel = device->getKernel( PATH, KERNEL4,
|
||||
"-I ..\\..\\ ", src[TYPE] );
|
||||
data->m_reorderContactKernel = device->getKernel( PATH, KERNEL5,
|
||||
"-I ..\\..\\ ", src[TYPE] );
|
||||
|
||||
data->m_copyConstraintKernel = device->getKernel( PATH, "CopyConstraintKernel",
|
||||
"-I ..\\..\\ ", src[TYPE] );
|
||||
|
||||
data->m_parallelSolveData = new SolverDeviceInl::ParallelSolveData;
|
||||
{
|
||||
SolverDeviceInl::ParallelSolveData* solveData = (SolverDeviceInl::ParallelSolveData*)data->m_parallelSolveData;
|
||||
solveData->m_numConstraints = new Buffer<u32>( device, N_SPLIT*N_SPLIT );
|
||||
solveData->m_offsets = new Buffer<u32>( device, N_SPLIT*N_SPLIT );
|
||||
}
|
||||
const int sortSize = NEXTMULTIPLEOF( pairCapacity, 512 );
|
||||
|
||||
|
||||
//data->m_sort = RadixSort<TYPE>::allocate( data->m_device, sortSize );//todo. remove hardcode this
|
||||
data->m_sort32 = RadixSort32<TYPE>::allocate( data->m_device, sortSize );//todo. remove hardcode this
|
||||
|
||||
data->m_search = BoundSearch<TYPE>::allocate( data->m_device, N_SPLIT*N_SPLIT );
|
||||
data->m_scan = PrefixScan<TYPE>::allocate( data->m_device, N_SPLIT*N_SPLIT );
|
||||
|
||||
data->m_sortDataBuffer = new Buffer<SortData>( data->m_device, sortSize );
|
||||
|
||||
if( pairCapacity < DYNAMIC_CONTACT_ALLOCATION_THRESHOLD )
|
||||
data->m_contactBuffer = new Buffer<Contact4>( data->m_device, pairCapacity );
|
||||
else
|
||||
data->m_contactBuffer = 0;
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
template<DeviceType TYPE>
|
||||
void Solver<TYPE>::deallocate( Data* data )
|
||||
{
|
||||
{
|
||||
SolverDeviceInl::ParallelSolveData* solveData = (SolverDeviceInl::ParallelSolveData*)data->m_parallelSolveData;
|
||||
delete solveData->m_numConstraints;
|
||||
delete solveData->m_offsets;
|
||||
delete solveData;
|
||||
}
|
||||
|
||||
// RadixSort<TYPE>::deallocate( data->m_sort );
|
||||
RadixSort32<TYPE>::deallocate(data->m_sort32);
|
||||
BoundSearch<TYPE>::deallocate( data->m_search );
|
||||
PrefixScan<TYPE>::deallocate( data->m_scan );
|
||||
|
||||
delete data->m_sortDataBuffer;
|
||||
if( data->m_contactBuffer ) delete data->m_contactBuffer;
|
||||
|
||||
delete data;
|
||||
}
|
||||
|
||||
template<DeviceType TYPE>
|
||||
void Solver<TYPE>::reorderConvertToConstraints( typename Solver<TYPE>::Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf,
|
||||
const Buffer<RigidBodyBase::Inertia>* shapeBuf,
|
||||
Buffer<Contact4>* contactsIn, SolverData contactCOut, void* additionalData,
|
||||
int nContacts, const typename Solver<TYPE>::ConstraintCfg& cfg )
|
||||
{
|
||||
if( data->m_contactBuffer )
|
||||
{
|
||||
if( data->m_contactBuffer->getSize() < nContacts )
|
||||
{
|
||||
BT_PROFILE("delete data->m_contactBuffer;");
|
||||
delete data->m_contactBuffer;
|
||||
data->m_contactBuffer = 0;
|
||||
}
|
||||
}
|
||||
if( data->m_contactBuffer == 0 )
|
||||
{
|
||||
BT_PROFILE("new data->m_contactBuffer;");
|
||||
|
||||
data->m_contactBuffer = new Buffer<Contact4>( data->m_device, nContacts );
|
||||
}
|
||||
Stopwatch sw;
|
||||
|
||||
Buffer<Contact4>* contactNative = BufferUtils::map<TYPE_CL, true>( data->m_device, contactsIn, nContacts );
|
||||
|
||||
//DeviceUtils::Config dhCfg;
|
||||
//Device* deviceHost = DeviceUtils::allocate( TYPE_HOST, dhCfg );
|
||||
if( cfg.m_enableParallelSolve )
|
||||
{
|
||||
SolverDeviceInl::ParallelSolveData* nativeSolveData = (SolverDeviceInl::ParallelSolveData*)data->m_parallelSolveData;
|
||||
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
sw.start();
|
||||
// contactsIn -> data->m_contactBuffer
|
||||
{
|
||||
BT_PROFILE("sortContacts");
|
||||
Solver<TYPE>::sortContacts( data, bodyBuf, contactNative, additionalData, nContacts, cfg );
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
}
|
||||
sw.split();
|
||||
if(0)
|
||||
{
|
||||
Contact4* tmp = new Contact4[nContacts];
|
||||
data->m_contactBuffer->read( tmp, nContacts );
|
||||
DeviceUtils::waitForCompletion( data->m_contactBuffer->m_device );
|
||||
contactNative->write( tmp, nContacts );
|
||||
DeviceUtils::waitForCompletion( contactNative->m_device );
|
||||
delete [] tmp;
|
||||
}
|
||||
else
|
||||
{
|
||||
BT_PROFILE("m_copyConstraintKernel");
|
||||
|
||||
Buffer<int4> constBuffer( data->m_device, 1, BufferBase::BUFFER_CONST );
|
||||
|
||||
int4 cdata; cdata.x = nContacts;
|
||||
BufferInfo bInfo[] = { BufferInfo( data->m_contactBuffer ), BufferInfo( contactNative ) };
|
||||
// Launcher launcher( data->m_device, data->m_device->getKernel( PATH, "CopyConstraintKernel", "-I ..\\..\\ -Wf,--c++", 0 ) );
|
||||
Launcher launcher( data->m_device, data->m_copyConstraintKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
|
||||
launcher.setConst( constBuffer, cdata );
|
||||
launcher.launch1D( nContacts, 64 );
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
}
|
||||
{
|
||||
BT_PROFILE("batchContacts");
|
||||
Solver<TYPE>::batchContacts( data, contactNative, nContacts, nativeSolveData->m_numConstraints, nativeSolveData->m_offsets, cfg.m_staticIdx );
|
||||
|
||||
}
|
||||
}
|
||||
{
|
||||
BT_PROFILE("waitForCompletion (batchContacts)");
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
}
|
||||
sw.split();
|
||||
//================
|
||||
if(0)
|
||||
{
|
||||
// Solver<TYPE_HOST>::Data* solverHost = Solver<TYPE_HOST>::allocate( deviceHost, nContacts );
|
||||
// Solver<TYPE_HOST>::convertToConstraints( solverHost, bodyBuf, shapeBuf, contactNative, contactCOut, additionalData, nContacts, cfg );
|
||||
// Solver<TYPE_HOST>::deallocate( solverHost );
|
||||
}
|
||||
else
|
||||
{
|
||||
BT_PROFILE("convertToConstraints");
|
||||
Solver<TYPE>::convertToConstraints( data, bodyBuf, shapeBuf, contactNative, contactCOut, additionalData, nContacts, cfg );
|
||||
}
|
||||
{
|
||||
BT_PROFILE("convertToConstraints waitForCompletion");
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
}
|
||||
sw.stop();
|
||||
|
||||
{
|
||||
BT_PROFILE("printf");
|
||||
|
||||
float t[5];
|
||||
sw.getMs( t, 3 );
|
||||
// printf("%3.2f, %3.2f, %3.2f, ", t[0], t[1], t[2]);
|
||||
}
|
||||
|
||||
{
|
||||
BT_PROFILE("deallocate and unmap");
|
||||
|
||||
//DeviceUtils::deallocate( deviceHost );
|
||||
|
||||
BufferUtils::unmap<true>( contactNative, contactsIn, nContacts );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<DeviceType TYPE>
|
||||
void Solver<TYPE>::solveContactConstraint( typename Solver<TYPE>::Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf, const Buffer<RigidBodyBase::Inertia>* shapeBuf,
|
||||
SolverData constraint, void* additionalData, int n )
|
||||
{
|
||||
if(0)
|
||||
{
|
||||
DeviceUtils::Config dhCfg;
|
||||
Device* deviceHost = DeviceUtils::allocate( TYPE_HOST, dhCfg );
|
||||
{
|
||||
Solver<TYPE_HOST>::Data* hostData = Solver<TYPE_HOST>::allocate( deviceHost, 0 );
|
||||
Solver<TYPE_HOST>::solveContactConstraint( hostData, bodyBuf, shapeBuf, constraint, additionalData, n );
|
||||
Solver<TYPE_HOST>::deallocate( hostData );
|
||||
}
|
||||
DeviceUtils::deallocate( deviceHost );
|
||||
return;
|
||||
}
|
||||
|
||||
ADLASSERT( data );
|
||||
|
||||
Buffer<Constraint4>* cBuffer =0;
|
||||
|
||||
Buffer<RigidBodyBase::Body>* gBodyNative=0;
|
||||
Buffer<RigidBodyBase::Inertia>* gShapeNative =0;
|
||||
Buffer<Constraint4>* gConstraintNative =0;
|
||||
|
||||
|
||||
{
|
||||
BT_PROFILE("map");
|
||||
cBuffer = (Buffer<Constraint4>*)constraint;
|
||||
|
||||
gBodyNative= BufferUtils::map<TYPE, true>( data->m_device, bodyBuf );
|
||||
gShapeNative= BufferUtils::map<TYPE, true>( data->m_device, shapeBuf );
|
||||
gConstraintNative = BufferUtils::map<TYPE, true>( data->m_device, cBuffer );
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
}
|
||||
|
||||
Buffer<int4> constBuffer;
|
||||
int4 cdata = make_int4( n, 0, 0, 0 );
|
||||
{
|
||||
SolverDeviceInl::ParallelSolveData* solveData = (SolverDeviceInl::ParallelSolveData*)data->m_parallelSolveData;
|
||||
const int nn = N_SPLIT*N_SPLIT;
|
||||
|
||||
cdata.x = 0;
|
||||
cdata.y = 250;
|
||||
|
||||
#if 0
|
||||
//check how the cells are filled
|
||||
unsigned int* hostCounts = new unsigned int[N_SPLIT*N_SPLIT];
|
||||
solveData->m_numConstraints->read(hostCounts,N_SPLIT*N_SPLIT);
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
for (int i=0;i<N_SPLIT*N_SPLIT;i++)
|
||||
{
|
||||
if (hostCounts[i])
|
||||
{
|
||||
printf("hostCounts[%d]=%d\n",i,hostCounts[i]);
|
||||
}
|
||||
}
|
||||
delete[] hostCounts;
|
||||
#endif
|
||||
|
||||
int numWorkItems = 64*nn/N_BATCHES;
|
||||
#ifdef DEBUG_ME
|
||||
SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems];
|
||||
adl::Buffer<SolverDebugInfo> gpuDebugInfo(data->m_device,numWorkItems);
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
{
|
||||
|
||||
BT_PROFILE("m_batchSolveKernel iterations");
|
||||
for(int iter=0; iter<data->m_nIterations; iter++)
|
||||
{
|
||||
for(int ib=0; ib<N_BATCHES; ib++)
|
||||
{
|
||||
#ifdef DEBUG_ME
|
||||
memset(debugInfo,0,sizeof(SolverDebugInfo)*numWorkItems);
|
||||
gpuDebugInfo.write(debugInfo,numWorkItems);
|
||||
#endif
|
||||
|
||||
|
||||
cdata.z = ib;
|
||||
cdata.w = N_SPLIT;
|
||||
|
||||
|
||||
|
||||
BufferInfo bInfo[] = {
|
||||
|
||||
BufferInfo( gBodyNative ),
|
||||
BufferInfo( gShapeNative ),
|
||||
BufferInfo( gConstraintNative ),
|
||||
BufferInfo( solveData->m_numConstraints ),
|
||||
BufferInfo( solveData->m_offsets )
|
||||
#ifdef DEBUG_ME
|
||||
, BufferInfo(&gpuDebugInfo)
|
||||
#endif
|
||||
};
|
||||
|
||||
Launcher launcher( data->m_device, data->m_batchSolveKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
|
||||
launcher.setConst( constBuffer, cdata );
|
||||
|
||||
launcher.launch1D( numWorkItems, 64 );
|
||||
|
||||
#ifdef DEBUG_ME
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
gpuDebugInfo.read(debugInfo,numWorkItems);
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
for (int i=0;i<numWorkItems;i++)
|
||||
{
|
||||
if (debugInfo[i].m_valInt2>0)
|
||||
{
|
||||
printf("debugInfo[i].m_valInt2 = %d\n",i,debugInfo[i].m_valInt2);
|
||||
}
|
||||
|
||||
if (debugInfo[i].m_valInt3>0)
|
||||
{
|
||||
printf("debugInfo[i].m_valInt3 = %d\n",i,debugInfo[i].m_valInt3);
|
||||
}
|
||||
}
|
||||
#endif //DEBUG_ME
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
|
||||
|
||||
}
|
||||
|
||||
cdata.x = 1;
|
||||
{
|
||||
BT_PROFILE("m_batchSolveKernel iterations2");
|
||||
for(int iter=0; iter<data->m_nIterations; iter++)
|
||||
{
|
||||
for(int ib=0; ib<N_BATCHES; ib++)
|
||||
{
|
||||
cdata.z = ib;
|
||||
cdata.w = N_SPLIT;
|
||||
|
||||
BufferInfo bInfo[] = {
|
||||
BufferInfo( gBodyNative ),
|
||||
BufferInfo( gShapeNative ),
|
||||
BufferInfo( gConstraintNative ),
|
||||
BufferInfo( solveData->m_numConstraints ),
|
||||
BufferInfo( solveData->m_offsets )
|
||||
#ifdef DEBUG_ME
|
||||
,BufferInfo(&gpuDebugInfo)
|
||||
#endif //DEBUG_ME
|
||||
};
|
||||
Launcher launcher( data->m_device, data->m_batchSolveKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
|
||||
launcher.setConst( constBuffer, cdata );
|
||||
launcher.launch1D( 64*nn/N_BATCHES, 64 );
|
||||
}
|
||||
}
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
|
||||
}
|
||||
#ifdef DEBUG_ME
|
||||
delete[] debugInfo;
|
||||
#endif //DEBUG_ME
|
||||
}
|
||||
|
||||
{
|
||||
BT_PROFILE("unmap");
|
||||
BufferUtils::unmap<true>( gBodyNative, bodyBuf );
|
||||
BufferUtils::unmap<false>( gShapeNative, shapeBuf );
|
||||
BufferUtils::unmap<true>( gConstraintNative, cBuffer );
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
}
|
||||
}
|
||||
|
||||
template<DeviceType TYPE>
|
||||
void Solver<TYPE>::convertToConstraints( typename Solver<TYPE>::Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf,
|
||||
const Buffer<RigidBodyBase::Inertia>* shapeBuf,
|
||||
Buffer<Contact4>* contactsIn, SolverData contactCOut, void* additionalData,
|
||||
int nContacts, const ConstraintCfg& cfg )
|
||||
{
|
||||
ADLASSERT( data->m_device->m_type == TYPE_CL );
|
||||
|
||||
Buffer<RigidBodyBase::Body>* bodyNative =0;
|
||||
Buffer<RigidBodyBase::Inertia>* shapeNative =0;
|
||||
Buffer<Contact4>* contactNative =0;
|
||||
Buffer<Constraint4>* constraintNative =0;
|
||||
|
||||
{
|
||||
BT_PROFILE("map buffers");
|
||||
|
||||
bodyNative = BufferUtils::map<TYPE, true>( data->m_device, bodyBuf );
|
||||
shapeNative = BufferUtils::map<TYPE, true>( data->m_device, shapeBuf );
|
||||
contactNative= BufferUtils::map<TYPE, true>( data->m_device, contactsIn );
|
||||
constraintNative = BufferUtils::map<TYPE, false>( data->m_device, (Buffer<Constraint4>*)contactCOut );
|
||||
}
|
||||
struct CB
|
||||
{
|
||||
int m_nContacts;
|
||||
float m_dt;
|
||||
float m_positionDrift;
|
||||
float m_positionConstraintCoeff;
|
||||
};
|
||||
|
||||
{
|
||||
BT_PROFILE("m_contactToConstraintKernel");
|
||||
CB cdata;
|
||||
cdata.m_nContacts = nContacts;
|
||||
cdata.m_dt = cfg.m_dt;
|
||||
cdata.m_positionDrift = cfg.m_positionDrift;
|
||||
cdata.m_positionConstraintCoeff = cfg.m_positionConstraintCoeff;
|
||||
|
||||
Buffer<CB> constBuffer( data->m_device, 1, BufferBase::BUFFER_CONST );
|
||||
BufferInfo bInfo[] = { BufferInfo( contactNative ), BufferInfo( bodyNative ), BufferInfo( shapeNative ),
|
||||
BufferInfo( constraintNative )};
|
||||
Launcher launcher( data->m_device, data->m_contactToConstraintKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
|
||||
launcher.setConst( constBuffer, cdata );
|
||||
launcher.launch1D( nContacts, 64 );
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
|
||||
}
|
||||
|
||||
{
|
||||
BT_PROFILE("unmap");
|
||||
BufferUtils::unmap<false>( bodyNative, bodyBuf );
|
||||
BufferUtils::unmap<false>( shapeNative, shapeBuf );
|
||||
BufferUtils::unmap<false>( contactNative, contactsIn );
|
||||
BufferUtils::unmap<true>( constraintNative, (Buffer<Constraint4>*)contactCOut );
|
||||
}
|
||||
}
|
||||
|
||||
template<DeviceType TYPE>
|
||||
void Solver<TYPE>::sortContacts( typename Solver<TYPE>::Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf,
|
||||
Buffer<Contact4>* contactsIn, void* additionalData,
|
||||
int nContacts, const typename Solver<TYPE>::ConstraintCfg& cfg )
|
||||
{
|
||||
ADLASSERT( data->m_device->m_type == TYPE_CL );
|
||||
Buffer<RigidBodyBase::Body>* bodyNative
|
||||
= BufferUtils::map<TYPE_CL, true>( data->m_device, bodyBuf );
|
||||
Buffer<Contact4>* contactNative
|
||||
= BufferUtils::map<TYPE_CL, true>( data->m_device, contactsIn );
|
||||
|
||||
const int sortAlignment = 512; // todo. get this out of sort
|
||||
if( cfg.m_enableParallelSolve )
|
||||
{
|
||||
SolverDeviceInl::ParallelSolveData* nativeSolveData = (SolverDeviceInl::ParallelSolveData*)data->m_parallelSolveData;
|
||||
|
||||
int sortSize = NEXTMULTIPLEOF( nContacts, sortAlignment );
|
||||
|
||||
Buffer<u32>* countsNative = nativeSolveData->m_numConstraints;//BufferUtils::map<TYPE_CL, false>( data->m_device, &countsHost );
|
||||
Buffer<u32>* offsetsNative = nativeSolveData->m_offsets;//BufferUtils::map<TYPE_CL, false>( data->m_device, &offsetsHost );
|
||||
|
||||
{ // 2. set cell idx
|
||||
struct CB
|
||||
{
|
||||
int m_nContacts;
|
||||
int m_staticIdx;
|
||||
float m_scale;
|
||||
int m_nSplit;
|
||||
};
|
||||
|
||||
ADLASSERT( sortSize%64 == 0 );
|
||||
CB cdata;
|
||||
cdata.m_nContacts = nContacts;
|
||||
cdata.m_staticIdx = cfg.m_staticIdx;
|
||||
cdata.m_scale = 1.f/(N_OBJ_PER_SPLIT*cfg.m_averageExtent);
|
||||
cdata.m_nSplit = N_SPLIT;
|
||||
|
||||
Buffer<CB> constBuffer( data->m_device, 1, BufferBase::BUFFER_CONST );
|
||||
BufferInfo bInfo[] = { BufferInfo( contactNative ), BufferInfo( bodyNative ), BufferInfo( data->m_sortDataBuffer ) };
|
||||
Launcher launcher( data->m_device, data->m_setSortDataKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
|
||||
launcher.setConst( constBuffer, cdata );
|
||||
launcher.launch1D( sortSize, 64 );
|
||||
}
|
||||
|
||||
{ // 3. sort by cell idx
|
||||
int n = N_SPLIT*N_SPLIT;
|
||||
int sortBit = 32;
|
||||
//if( n <= 0xffff ) sortBit = 16;
|
||||
//if( n <= 0xff ) sortBit = 8;
|
||||
RadixSort32<TYPE>::execute( data->m_sort32, *data->m_sortDataBuffer,sortSize);
|
||||
}
|
||||
{ // 4. find entries
|
||||
BoundSearch<TYPE>::execute( data->m_search, *data->m_sortDataBuffer, nContacts, *countsNative, N_SPLIT*N_SPLIT, BoundSearchBase::COUNT );
|
||||
|
||||
PrefixScan<TYPE>::execute( data->m_scan, *countsNative, *offsetsNative, N_SPLIT*N_SPLIT );
|
||||
}
|
||||
|
||||
{ // 5. sort constraints by cellIdx
|
||||
// todo. preallocate this
|
||||
// ADLASSERT( contactsIn->getType() == TYPE_HOST );
|
||||
// Buffer<Contact4>* out = BufferUtils::map<TYPE_CL, false>( data->m_device, contactsIn ); // copying contacts to this buffer
|
||||
|
||||
{
|
||||
Buffer<int4> constBuffer( data->m_device, 1, BufferBase::BUFFER_CONST );
|
||||
|
||||
int4 cdata; cdata.x = nContacts;
|
||||
BufferInfo bInfo[] = { BufferInfo( contactNative ), BufferInfo( data->m_contactBuffer ), BufferInfo( data->m_sortDataBuffer ) };
|
||||
Launcher launcher( data->m_device, data->m_reorderContactKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
|
||||
launcher.setConst( constBuffer, cdata );
|
||||
launcher.launch1D( nContacts, 64 );
|
||||
}
|
||||
// BufferUtils::unmap<true>( out, contactsIn, nContacts );
|
||||
}
|
||||
}
|
||||
|
||||
BufferUtils::unmap<false>( bodyNative, bodyBuf );
|
||||
BufferUtils::unmap<false>( contactNative, contactsIn );
|
||||
}
|
||||
|
||||
template<DeviceType TYPE>
|
||||
void Solver<TYPE>::batchContacts( typename Solver<TYPE>::Data* data, Buffer<Contact4>* contacts, int nContacts, Buffer<u32>* n, Buffer<u32>* offsets, int staticIdx )
|
||||
{
|
||||
ADLASSERT( data->m_device->m_type == TYPE_CL );
|
||||
|
||||
if(0)
|
||||
{
|
||||
BT_PROFILE("CPU classTestKernel/Kernel (batch generation?)");
|
||||
|
||||
DeviceUtils::Config dhCfg;
|
||||
Device* deviceHost = DeviceUtils::allocate( TYPE_HOST, dhCfg );
|
||||
{
|
||||
Solver<TYPE_HOST>::Data* hostData = Solver<TYPE_HOST>::allocate( deviceHost, 0 );
|
||||
Solver<TYPE_HOST>::batchContacts( hostData, contacts, nContacts, n, offsets, staticIdx );
|
||||
Solver<TYPE_HOST>::deallocate( hostData );
|
||||
}
|
||||
DeviceUtils::deallocate( deviceHost );
|
||||
return;
|
||||
}
|
||||
|
||||
Buffer<Contact4>* contactNative
|
||||
= BufferUtils::map<TYPE_CL, true>( data->m_device, contacts, nContacts );
|
||||
Buffer<u32>* nNative
|
||||
= BufferUtils::map<TYPE_CL, true>( data->m_device, n );
|
||||
Buffer<u32>* offsetsNative
|
||||
= BufferUtils::map<TYPE_CL, true>( data->m_device, offsets );
|
||||
|
||||
{
|
||||
BT_PROFILE("GPU classTestKernel/Kernel (batch generation?)");
|
||||
Buffer<int4> constBuffer( data->m_device, 1, BufferBase::BUFFER_CONST );
|
||||
int4 cdata;
|
||||
cdata.x = nContacts;
|
||||
cdata.y = 0;
|
||||
cdata.z = staticIdx;
|
||||
|
||||
int numWorkItems = 64*N_SPLIT*N_SPLIT;
|
||||
#ifdef BATCH_DEBUG
|
||||
SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems];
|
||||
adl::Buffer<SolverDebugInfo> gpuDebugInfo(data->m_device,numWorkItems);
|
||||
memset(debugInfo,0,sizeof(SolverDebugInfo)*numWorkItems);
|
||||
gpuDebugInfo.write(debugInfo,numWorkItems);
|
||||
#endif
|
||||
|
||||
|
||||
BufferInfo bInfo[] = {
|
||||
BufferInfo( contactNative ),
|
||||
BufferInfo( data->m_contactBuffer ),
|
||||
BufferInfo( nNative ),
|
||||
BufferInfo( offsetsNative )
|
||||
#ifdef BATCH_DEBUG
|
||||
, BufferInfo(&gpuDebugInfo)
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
|
||||
Launcher launcher( data->m_device, data->m_batchingKernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
|
||||
launcher.setConst( constBuffer, cdata );
|
||||
launcher.launch1D( numWorkItems, 64 );
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
|
||||
#ifdef BATCH_DEBUG
|
||||
aaaa
|
||||
Contact4* hostContacts = new Contact4[nContacts];
|
||||
data->m_contactBuffer->read(hostContacts,nContacts);
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
|
||||
gpuDebugInfo.read(debugInfo,numWorkItems);
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
|
||||
for (int i=0;i<numWorkItems;i++)
|
||||
{
|
||||
if (debugInfo[i].m_valInt1>0)
|
||||
{
|
||||
printf("catch\n");
|
||||
}
|
||||
if (debugInfo[i].m_valInt2>0)
|
||||
{
|
||||
printf("catch22\n");
|
||||
}
|
||||
|
||||
if (debugInfo[i].m_valInt3>0)
|
||||
{
|
||||
printf("catch666\n");
|
||||
}
|
||||
|
||||
if (debugInfo[i].m_valInt4>0)
|
||||
{
|
||||
printf("catch777\n");
|
||||
}
|
||||
}
|
||||
delete[] debugInfo;
|
||||
#endif //BATCH_DEBUG
|
||||
|
||||
}
|
||||
|
||||
if(0)
|
||||
{
|
||||
u32* nhost = new u32[N_SPLIT*N_SPLIT];
|
||||
|
||||
nNative->read( nhost, N_SPLIT*N_SPLIT );
|
||||
|
||||
Contact4* chost = new Contact4[nContacts];
|
||||
data->m_contactBuffer->read( chost, nContacts );
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
printf(">>");
|
||||
int nonzero = 0;
|
||||
u32 maxn = 0;
|
||||
for(int i=0; i<N_SPLIT*N_SPLIT; i++)
|
||||
{
|
||||
printf("%d-", nhost[i]);
|
||||
nonzero += (nhost[i]==0)? 0:1;
|
||||
maxn = max2( nhost[i], maxn );
|
||||
}
|
||||
printf("\nnonzero:zero = %d:%d (%d)\n", nonzero, N_SPLIT*N_SPLIT-nonzero, maxn);
|
||||
printf("\n\n");
|
||||
|
||||
int prev = 0;
|
||||
int prevIdx = 0;
|
||||
int maxNBatches = 0;
|
||||
for(int i=0; i<nContacts; i++)
|
||||
{
|
||||
// printf("(%d, %d:%d),", chost[i].m_batchIdx, chost[i].m_bodyAPtr, chost[i].m_bodyBPtr);
|
||||
if( prev != 0 && chost[i].m_batchIdx == 0 )
|
||||
{
|
||||
maxNBatches = max2( maxNBatches, prev );
|
||||
printf("\n[%d]", prev);
|
||||
|
||||
//for(int j=prevIdx; j<i; j++)
|
||||
//{
|
||||
// printf("(%d:%d),", chost[j].m_bodyAPtr, chost[j].m_bodyBPtr);
|
||||
//}
|
||||
|
||||
//printf("\n");
|
||||
|
||||
prevIdx = i;
|
||||
}
|
||||
|
||||
printf("%d,", chost[i].m_batchIdx);
|
||||
|
||||
prev = chost[i].m_batchIdx;
|
||||
}
|
||||
printf("\n");
|
||||
printf("Max: %d\n", maxNBatches);
|
||||
|
||||
delete [] chost;
|
||||
delete [] nhost;
|
||||
}
|
||||
// copy buffer to buffer
|
||||
contactNative->write( *data->m_contactBuffer, nContacts );
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
|
||||
if(0)
|
||||
{
|
||||
DeviceUtils::Config dhCfg;
|
||||
Device* deviceHost = DeviceUtils::allocate( TYPE_HOST, dhCfg );
|
||||
{
|
||||
HostBuffer<Contact4> host( deviceHost, nContacts );
|
||||
contactNative->read( host.m_ptr, nContacts );
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
|
||||
for(int i=0; i<nContacts; i++)
|
||||
{
|
||||
ADLASSERT( host[i].m_bodyAPtr <= (u32)staticIdx );
|
||||
ADLASSERT( host[i].m_bodyBPtr <= (u32)staticIdx );
|
||||
}
|
||||
}
|
||||
DeviceUtils::deallocate( deviceHost );
|
||||
}
|
||||
|
||||
BufferUtils::unmap<true>( contactNative, contacts );
|
||||
BufferUtils::unmap<false>( nNative, n );
|
||||
BufferUtils::unmap<false>( offsetsNative, offsets );
|
||||
}
|
||||
|
||||
#undef PATH
|
||||
#undef KERNEL1
|
||||
#undef KERNEL2
|
||||
|
||||
#undef KERNEL3
|
||||
#undef KERNEL4
|
||||
#undef KERNEL5
|
||||
@@ -0,0 +1,848 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
class SolverInl
|
||||
{
|
||||
public:
|
||||
typedef SolverBase::ConstraintData ConstraintData;
|
||||
|
||||
|
||||
static
|
||||
__forceinline
|
||||
void setLinearAndAngular(const MYF4& n, const MYF4& r0, const MYF4& r1,
|
||||
MYF4& linear, MYF4& angular0, MYF4& angular1)
|
||||
{
|
||||
linear = -n;
|
||||
angular0 = -cross3(r0, n);
|
||||
angular1 = cross3(r1, n);
|
||||
}
|
||||
|
||||
static
|
||||
__forceinline
|
||||
float calcJacCoeff(const MYF4& linear0, const MYF4& linear1, const MYF4& angular0, const MYF4& angular1,
|
||||
float invMass0, const Matrix3x3& invInertia0, float invMass1, const Matrix3x3& invInertia1)
|
||||
{
|
||||
// linear0,1 are normlized
|
||||
float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0;
|
||||
float jmj1 = dot3F4(mtMul3(angular0,invInertia0), angular0);
|
||||
float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1;
|
||||
float jmj3 = dot3F4(mtMul3(angular1,invInertia1), angular1);
|
||||
return -1.f/(jmj0+jmj1+jmj2+jmj3);
|
||||
}
|
||||
static
|
||||
__forceinline
|
||||
float calcRelVel(const MYF4& l0, const MYF4& l1, const MYF4& a0, const MYF4& a1,
|
||||
const MYF4& linVel0, const MYF4& angVel0, const MYF4& linVel1, const MYF4& angVel1)
|
||||
{
|
||||
return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1);
|
||||
}
|
||||
|
||||
static
|
||||
__forceinline
|
||||
void setConstraint4( const MYF4& posA, const MYF4& linVelA, const MYF4& angVelA, float invMassA, const Matrix3x3& invInertiaA,
|
||||
const MYF4& posB, const MYF4& linVelB, const MYF4& angVelB, float invMassB, const Matrix3x3& invInertiaB,
|
||||
const Contact4& src, const SolverBase::ConstraintCfg& cfg,
|
||||
Constraint4& dstC )
|
||||
{
|
||||
dstC.m_bodyA = (u32)src.m_bodyAPtr;
|
||||
dstC.m_bodyB = (u32)src.m_bodyBPtr;
|
||||
|
||||
float dtInv = 1.f/cfg.m_dt;
|
||||
for(int ic=0; ic<4; ic++)
|
||||
{
|
||||
dstC.m_appliedRambdaDt[ic] = 0.f;
|
||||
}
|
||||
dstC.m_fJacCoeffInv[0] = dstC.m_fJacCoeffInv[1] = 0.f;
|
||||
|
||||
|
||||
const MYF4& n = src.m_worldNormal;
|
||||
dstC.m_linear = -n;
|
||||
dstC.setFrictionCoeff( src.getFrictionCoeff() );
|
||||
for(int ic=0; ic<4; ic++)
|
||||
{
|
||||
MYF4 r0 = src.m_worldPos[ic] - posA;
|
||||
MYF4 r1 = src.m_worldPos[ic] - posB;
|
||||
|
||||
if( ic >= src.getNPoints() )
|
||||
{
|
||||
dstC.m_jacCoeffInv[ic] = 0.f;
|
||||
continue;
|
||||
}
|
||||
|
||||
float relVelN;
|
||||
{
|
||||
MYF4 linear, angular0, angular1;
|
||||
setLinearAndAngular(n, r0, r1, linear, angular0, angular1);
|
||||
|
||||
dstC.m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,
|
||||
invMassA, invInertiaA, invMassB, invInertiaB );
|
||||
|
||||
relVelN = calcRelVel(linear, -linear, angular0, angular1,
|
||||
linVelA, angVelA, linVelB, angVelB);
|
||||
|
||||
float e = src.getRestituitionCoeff();
|
||||
if( relVelN*relVelN < 0.004f ) e = 0.f;
|
||||
|
||||
dstC.m_b[ic] = e*relVelN;
|
||||
dstC.m_b[ic] += (src.getPenetration(ic) + cfg.m_positionDrift)*cfg.m_positionConstraintCoeff*dtInv;
|
||||
dstC.m_appliedRambdaDt[ic] = 0.f;
|
||||
}
|
||||
}
|
||||
|
||||
if( src.getNPoints() > 1 )
|
||||
{ // prepare friction
|
||||
MYF4 center = MAKE_MYF4(0.f);
|
||||
for(int i=0; i<src.getNPoints(); i++) center += src.m_worldPos[i];
|
||||
center /= (float)src.getNPoints();
|
||||
|
||||
MYF4 tangent[2];
|
||||
tangent[0] = cross3( src.m_worldNormal, src.m_worldPos[0]-center );
|
||||
tangent[1] = cross3( tangent[0], src.m_worldNormal );
|
||||
tangent[0] = normalize3( tangent[0] );
|
||||
tangent[1] = normalize3( tangent[1] );
|
||||
MYF4 r[2];
|
||||
r[0] = center - posA;
|
||||
r[1] = center - posB;
|
||||
|
||||
for(int i=0; i<2; i++)
|
||||
{
|
||||
MYF4 linear, angular0, angular1;
|
||||
setLinearAndAngular(tangent[i], r[0], r[1], linear, angular0, angular1);
|
||||
|
||||
dstC.m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,
|
||||
invMassA, invInertiaA, invMassB, invInertiaB );
|
||||
dstC.m_fAppliedRambdaDt[i] = 0.f;
|
||||
}
|
||||
dstC.m_center = center;
|
||||
}
|
||||
else
|
||||
{
|
||||
// single point constraint
|
||||
}
|
||||
|
||||
for(int i=0; i<4; i++)
|
||||
{
|
||||
if( i<src.getNPoints() )
|
||||
{
|
||||
dstC.m_worldPos[i] = src.m_worldPos[i];
|
||||
}
|
||||
else
|
||||
{
|
||||
dstC.m_worldPos[i] = MAKE_MYF4(0.f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
struct Constraint4
|
||||
{
|
||||
float4 m_linear; X
|
||||
float4 m_angular0[4]; X
|
||||
float4 m_angular1[4]; center
|
||||
float m_jacCoeffInv[4]; [0,1]
|
||||
float m_b[4]; X
|
||||
float m_appliedRambdaDt[4]; [0,1]
|
||||
|
||||
void* m_bodyAPtr; X
|
||||
void* m_bodyBPtr; X
|
||||
};
|
||||
*/
|
||||
static
|
||||
__inline
|
||||
void solveFriction(Constraint4& cs,
|
||||
const MYF4& posA, MYF4& linVelA, MYF4& angVelA, float invMassA, const Matrix3x3& invInertiaA,
|
||||
const MYF4& posB, MYF4& linVelB, MYF4& angVelB, float invMassB, const Matrix3x3& invInertiaB,
|
||||
float maxRambdaDt[4], float minRambdaDt[4])
|
||||
{
|
||||
if( cs.m_fJacCoeffInv[0] == 0 && cs.m_fJacCoeffInv[0] == 0 ) return;
|
||||
const MYF4& center = cs.m_center;
|
||||
|
||||
MYF4 n = -cs.m_linear;
|
||||
|
||||
MYF4 tangent[2];
|
||||
tangent[0] = cross3( n, cs.m_worldPos[0]-center );
|
||||
tangent[1] = cross3( tangent[0], n );
|
||||
tangent[0] = normalize3( tangent[0] );
|
||||
tangent[1] = normalize3( tangent[1] );
|
||||
|
||||
MYF4 angular0, angular1, linear;
|
||||
MYF4 r0 = center - posA;
|
||||
MYF4 r1 = center - posB;
|
||||
for(int i=0; i<2; i++)
|
||||
{
|
||||
setLinearAndAngular( tangent[i], r0, r1, linear, angular0, angular1 );
|
||||
float rambdaDt = calcRelVel(linear, -linear, angular0, angular1,
|
||||
linVelA, angVelA, linVelB, angVelB );
|
||||
rambdaDt *= cs.m_fJacCoeffInv[i];
|
||||
|
||||
{
|
||||
float prevSum = cs.m_fAppliedRambdaDt[i];
|
||||
float updated = prevSum;
|
||||
updated += rambdaDt;
|
||||
updated = max2( updated, minRambdaDt[i] );
|
||||
updated = min2( updated, maxRambdaDt[i] );
|
||||
rambdaDt = updated - prevSum;
|
||||
cs.m_fAppliedRambdaDt[i] = updated;
|
||||
}
|
||||
|
||||
MYF4 linImp0 = invMassA*linear*rambdaDt;
|
||||
MYF4 linImp1 = invMassB*(-linear)*rambdaDt;
|
||||
MYF4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;
|
||||
MYF4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;
|
||||
|
||||
linVelA += linImp0;
|
||||
angVelA += angImp0;
|
||||
linVelB += linImp1;
|
||||
angVelB += angImp1;
|
||||
}
|
||||
|
||||
{ // angular damping for point constraint
|
||||
MYF4 ab = normalize3( posB - posA );
|
||||
MYF4 ac = normalize3( center - posA );
|
||||
if( dot3F4( ab, ac ) > 0.95f || (invMassA == 0.f || invMassB == 0.f))
|
||||
{
|
||||
float angNA = dot3F4( n, angVelA );
|
||||
float angNB = dot3F4( n, angVelB );
|
||||
|
||||
angVelA -= (angNA*0.1f)*n;
|
||||
angVelB -= (angNB*0.1f)*n;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<bool JACOBI>
|
||||
static
|
||||
__inline
|
||||
void solveContact(Constraint4& cs,
|
||||
const MYF4& posA, MYF4& linVelA, MYF4& angVelA, float invMassA, const Matrix3x3& invInertiaA,
|
||||
const MYF4& posB, MYF4& linVelB, MYF4& angVelB, float invMassB, const Matrix3x3& invInertiaB,
|
||||
float maxRambdaDt[4], float minRambdaDt[4])
|
||||
{
|
||||
MYF4 dLinVelA = MAKE_MYF4(0.f);
|
||||
MYF4 dAngVelA = MAKE_MYF4(0.f);
|
||||
MYF4 dLinVelB = MAKE_MYF4(0.f);
|
||||
MYF4 dAngVelB = MAKE_MYF4(0.f);
|
||||
|
||||
for(int ic=0; ic<4; ic++)
|
||||
{
|
||||
// dont necessary because this makes change to 0
|
||||
if( cs.m_jacCoeffInv[ic] == 0.f ) continue;
|
||||
|
||||
{
|
||||
MYF4 angular0, angular1, linear;
|
||||
MYF4 r0 = cs.m_worldPos[ic] - posA;
|
||||
MYF4 r1 = cs.m_worldPos[ic] - posB;
|
||||
setLinearAndAngular( -cs.m_linear, r0, r1, linear, angular0, angular1 );
|
||||
|
||||
float rambdaDt = calcRelVel(cs.m_linear, -cs.m_linear, angular0, angular1,
|
||||
linVelA, angVelA, linVelB, angVelB ) + cs.m_b[ic];
|
||||
rambdaDt *= cs.m_jacCoeffInv[ic];
|
||||
|
||||
{
|
||||
float prevSum = cs.m_appliedRambdaDt[ic];
|
||||
float updated = prevSum;
|
||||
updated += rambdaDt;
|
||||
updated = max2( updated, minRambdaDt[ic] );
|
||||
updated = min2( updated, maxRambdaDt[ic] );
|
||||
rambdaDt = updated - prevSum;
|
||||
cs.m_appliedRambdaDt[ic] = updated;
|
||||
}
|
||||
|
||||
MYF4 linImp0 = invMassA*linear*rambdaDt;
|
||||
MYF4 linImp1 = invMassB*(-linear)*rambdaDt;
|
||||
MYF4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;
|
||||
MYF4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;
|
||||
|
||||
if( JACOBI )
|
||||
{
|
||||
dLinVelA += linImp0;
|
||||
dAngVelA += angImp0;
|
||||
dLinVelB += linImp1;
|
||||
dAngVelB += angImp1;
|
||||
}
|
||||
else
|
||||
{
|
||||
linVelA += linImp0;
|
||||
angVelA += angImp0;
|
||||
linVelB += linImp1;
|
||||
angVelB += angImp1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( JACOBI )
|
||||
{
|
||||
linVelA += dLinVelA;
|
||||
angVelA += dAngVelA;
|
||||
linVelB += dLinVelB;
|
||||
angVelB += dAngVelB;
|
||||
}
|
||||
}
|
||||
|
||||
enum
|
||||
{
|
||||
N_SPLIT = SolverBase::N_SPLIT,
|
||||
};
|
||||
|
||||
// for parallel solve
|
||||
struct ParallelSolveData
|
||||
{
|
||||
u32 m_n[N_SPLIT*N_SPLIT];
|
||||
u32 m_offset[N_SPLIT*N_SPLIT];
|
||||
};
|
||||
|
||||
static
|
||||
__inline
|
||||
int sortConstraintByBatch(Contact4* cs, int n, int ignoreIdx, int simdWidth = -1)
|
||||
{
|
||||
SortData* sortData;
|
||||
{
|
||||
BT_PROFILE("new");
|
||||
sortData = new SortData[n];
|
||||
}
|
||||
|
||||
u32* idxBuffer = new u32[n];
|
||||
u32* idxSrc = idxBuffer;
|
||||
u32* idxDst = idxBuffer;
|
||||
int nIdxSrc, nIdxDst;
|
||||
|
||||
const int N_FLG = 256;
|
||||
const int FLG_MASK = N_FLG-1;
|
||||
u32 flg[N_FLG/32];
|
||||
#if defined(_DEBUG)
|
||||
for(int i=0; i<n; i++) cs[i].getBatchIdx() = -1;
|
||||
#endif
|
||||
for(int i=0; i<n; i++) idxSrc[i] = i;
|
||||
nIdxSrc = n;
|
||||
|
||||
int batchIdx = 0;
|
||||
|
||||
{
|
||||
BT_PROFILE("batching");
|
||||
while( nIdxSrc )
|
||||
{
|
||||
nIdxDst = 0;
|
||||
int nCurrentBatch = 0;
|
||||
|
||||
// clear flag
|
||||
for(int i=0; i<N_FLG/32; i++) flg[i] = 0;
|
||||
|
||||
for(int i=0; i<nIdxSrc; i++)
|
||||
{
|
||||
int idx = idxSrc[i];
|
||||
ADLASSERT( idx < n );
|
||||
// check if it can go
|
||||
int aIdx = cs[idx].m_bodyAPtr & FLG_MASK;
|
||||
int bIdx = cs[idx].m_bodyBPtr & FLG_MASK;
|
||||
|
||||
u32 aUnavailable = flg[ aIdx/32 ] & (1<<(aIdx&31));
|
||||
u32 bUnavailable = flg[ bIdx/32 ] & (1<<(bIdx&31));
|
||||
|
||||
aUnavailable = (ignoreIdx==cs[idx].m_bodyAPtr)? 0:aUnavailable;
|
||||
bUnavailable = (ignoreIdx==cs[idx].m_bodyBPtr)? 0:bUnavailable;
|
||||
|
||||
if( aUnavailable==0 && bUnavailable==0 ) // ok
|
||||
{
|
||||
flg[ aIdx/32 ] |= (1<<(aIdx&31));
|
||||
flg[ bIdx/32 ] |= (1<<(bIdx&31));
|
||||
cs[idx].getBatchIdx() = batchIdx;
|
||||
sortData[idx].m_key = batchIdx;
|
||||
sortData[idx].m_value = idx;
|
||||
|
||||
{
|
||||
nCurrentBatch++;
|
||||
if( nCurrentBatch == simdWidth )
|
||||
{
|
||||
nCurrentBatch = 0;
|
||||
for(int i=0; i<N_FLG/32; i++) flg[i] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
idxDst[nIdxDst++] = idx;
|
||||
}
|
||||
}
|
||||
swap2( idxSrc, idxDst );
|
||||
swap2( nIdxSrc, nIdxDst );
|
||||
batchIdx ++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
{
|
||||
BT_PROFILE("radix sort data");
|
||||
// sort SortData
|
||||
Device::Config cfg;
|
||||
Device* deviceHost = DeviceUtils::allocate( TYPE_HOST, cfg );
|
||||
{
|
||||
Buffer<SortData> sortBuffer; sortBuffer.setRawPtr( deviceHost, sortData, n );
|
||||
RadixSort<TYPE_HOST>::Data* sort = RadixSort<TYPE_HOST>::allocate( deviceHost, n );
|
||||
|
||||
RadixSort<TYPE_HOST>::execute( sort, sortBuffer, n );
|
||||
|
||||
RadixSort<TYPE_HOST>::deallocate( sort );
|
||||
}
|
||||
DeviceUtils::deallocate( deviceHost );
|
||||
}
|
||||
|
||||
{
|
||||
BT_PROFILE("reorder");
|
||||
// reorder
|
||||
Contact4* old = new Contact4[n];
|
||||
memcpy( old, cs, sizeof(Contact4)*n);
|
||||
for(int i=0; i<n; i++)
|
||||
{
|
||||
int idx = sortData[i].m_value;
|
||||
cs[i] = old[idx];
|
||||
}
|
||||
delete [] old;
|
||||
}
|
||||
|
||||
{
|
||||
BT_PROFILE("delete");
|
||||
delete [] idxBuffer;
|
||||
delete [] sortData;
|
||||
}
|
||||
#if defined(_DEBUG)
|
||||
// debugPrintf( "nBatches: %d\n", batchIdx );
|
||||
for(int i=0; i<n; i++) ADLASSERT( cs[i].getBatchIdx() != -1 );
|
||||
#endif
|
||||
return batchIdx;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
enum
|
||||
{
|
||||
// N_SPLIT = SOLVER_N_SPLIT,
|
||||
// MAX_TASKS_PER_BATCH = N_SPLIT*N_SPLIT/4,
|
||||
};
|
||||
|
||||
struct SolveTask// : public ThreadPool::Task
|
||||
{
|
||||
SolveTask(const Buffer<RigidBodyBase::Body>* bodies, const Buffer<RigidBodyBase::Inertia>* shapes, const Buffer<Constraint4>* constraints,
|
||||
int start, int nConstraints)
|
||||
: m_bodies( bodies ), m_shapes( shapes ), m_constraints( constraints ), m_start( start ), m_nConstraints( nConstraints ),
|
||||
m_solveFriction( true ){}
|
||||
|
||||
u16 getType(){ return 0; }
|
||||
|
||||
void run(int tIdx)
|
||||
{
|
||||
HostBuffer<RigidBodyBase::Body>& hBody = *(HostBuffer<RigidBodyBase::Body>*)m_bodies;
|
||||
HostBuffer<RigidBodyBase::Inertia>& hShape = *(HostBuffer<RigidBodyBase::Inertia>*)m_shapes;
|
||||
HostBuffer<Constraint4>& hc = *(HostBuffer<Constraint4>*)m_constraints;
|
||||
|
||||
for(int ic=0; ic<m_nConstraints; ic++)
|
||||
{
|
||||
int i = m_start + ic;
|
||||
|
||||
float frictionCoeff = hc[i].getFrictionCoeff();
|
||||
int aIdx = (int)hc[i].m_bodyA;
|
||||
int bIdx = (int)hc[i].m_bodyB;
|
||||
RigidBodyBase::Body& bodyA = hBody[aIdx];
|
||||
RigidBodyBase::Body& bodyB = hBody[bIdx];
|
||||
|
||||
if( !m_solveFriction )
|
||||
{
|
||||
float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
|
||||
float minRambdaDt[4] = {0.f,0.f,0.f,0.f};
|
||||
|
||||
SolverInl::solveContact<false>( hc[i], bodyA.m_pos, (MYF4&)bodyA.m_linVel, (MYF4&)bodyA.m_angVel, bodyA.m_invMass, hShape[aIdx].m_invInertia,
|
||||
bodyB.m_pos, (MYF4&)bodyB.m_linVel, (MYF4&)bodyB.m_angVel, bodyB.m_invMass, hShape[bIdx].m_invInertia,
|
||||
maxRambdaDt, minRambdaDt );
|
||||
}
|
||||
else
|
||||
{
|
||||
float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
|
||||
float minRambdaDt[4] = {0.f,0.f,0.f,0.f};
|
||||
|
||||
float sum = 0;
|
||||
for(int j=0; j<4; j++)
|
||||
{
|
||||
sum +=hc[i].m_appliedRambdaDt[j];
|
||||
}
|
||||
frictionCoeff = 0.7f;
|
||||
for(int j=0; j<4; j++)
|
||||
{
|
||||
maxRambdaDt[j] = frictionCoeff*sum;
|
||||
minRambdaDt[j] = -maxRambdaDt[j];
|
||||
}
|
||||
|
||||
SolverInl::solveFriction( hc[i], bodyA.m_pos, (MYF4&)bodyA.m_linVel, (MYF4&)bodyA.m_angVel, bodyA.m_invMass, hShape[aIdx].m_invInertia,
|
||||
bodyB.m_pos, (MYF4&)bodyB.m_linVel, (MYF4&)bodyB.m_angVel, bodyB.m_invMass, hShape[bIdx].m_invInertia,
|
||||
maxRambdaDt, minRambdaDt );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const Buffer<RigidBodyBase::Body>* m_bodies;
|
||||
const Buffer<RigidBodyBase::Inertia>* m_shapes;
|
||||
const Buffer<Constraint4>* m_constraints;
|
||||
int m_start;
|
||||
int m_nConstraints;
|
||||
bool m_solveFriction;
|
||||
};
|
||||
|
||||
|
||||
template<>
|
||||
static Solver<adl::TYPE_HOST>::Data* Solver<adl::TYPE_HOST>::allocate( const Device* device, int pairCapacity )
|
||||
{
|
||||
Solver<adl::TYPE_HOST>::Data* data = new Data;
|
||||
data->m_device = device;
|
||||
data->m_parallelSolveData = 0;
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
template<>
|
||||
static void Solver<adl::TYPE_HOST>::deallocate( Solver<TYPE_HOST>::Data* data )
|
||||
{
|
||||
if( data->m_parallelSolveData ) delete (SolverInl::ParallelSolveData*)data->m_parallelSolveData;
|
||||
delete data;
|
||||
}
|
||||
|
||||
|
||||
void sortContacts2( Solver<TYPE_HOST>::Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf,
|
||||
Buffer<Contact4>* contactsIn, void* additionalData,
|
||||
int nContacts, const Solver<TYPE_HOST>::ConstraintCfg& cfg )
|
||||
{
|
||||
ADLASSERT( data->m_device->m_type == TYPE_HOST );
|
||||
HostBuffer<RigidBodyBase::Body>* bodyNative
|
||||
= (HostBuffer<RigidBodyBase::Body>*)BufferUtils::map<TYPE_HOST, true>( data->m_device, bodyBuf );
|
||||
HostBuffer<Contact4>* contactNative
|
||||
= (HostBuffer<Contact4>*)BufferUtils::map<TYPE_HOST, true>( data->m_device, contactsIn);
|
||||
|
||||
if( cfg.m_enableParallelSolve )
|
||||
{
|
||||
ADLASSERT( data->m_parallelSolveData == 0 );
|
||||
data->m_parallelSolveData = new SolverInl::ParallelSolveData;
|
||||
SolverInl::ParallelSolveData* solveData = (SolverInl::ParallelSolveData*)data->m_parallelSolveData;
|
||||
|
||||
HostBuffer<SortData> sortData( data->m_device, nContacts );
|
||||
{ // 2. set cell idx
|
||||
float spacing = adl::SolverBase::N_OBJ_PER_SPLIT*cfg.m_averageExtent;
|
||||
float xScale = 1.f/spacing;
|
||||
for(int i=0; i<nContacts; i++)
|
||||
{
|
||||
int idx = ((*contactNative)[i].m_bodyAPtr==cfg.m_staticIdx)? (*contactNative)[i].m_bodyBPtr:(*contactNative)[i].m_bodyAPtr;
|
||||
float4& p = (*bodyNative)[idx].m_pos;
|
||||
int xIdx = (int)((p.x-((p.x<0.f)?1.f:0.f))*xScale)&(adl::SolverBase::N_SPLIT-1);
|
||||
int zIdx = (int)((p.z-((p.z<0.f)?1.f:0.f))*xScale)&(adl::SolverBase::N_SPLIT-1);
|
||||
ADLASSERT( xIdx >= 0 && xIdx < adl::SolverBase::N_SPLIT );
|
||||
ADLASSERT( zIdx >= 0 && zIdx < adl::SolverBase::N_SPLIT );
|
||||
sortData[i].m_key = (xIdx+zIdx*adl::SolverBase::N_SPLIT);
|
||||
sortData[i].m_value = i;
|
||||
}
|
||||
}
|
||||
|
||||
{ // 3. sort by cell idx
|
||||
RadixSort<TYPE_HOST>::Data* sData = RadixSort<TYPE_HOST>::allocate( data->m_device, nContacts );
|
||||
|
||||
RadixSort<TYPE_HOST>::execute( sData, sortData, nContacts );
|
||||
|
||||
RadixSort<TYPE_HOST>::deallocate( sData );
|
||||
}
|
||||
|
||||
{ // 4. find entries
|
||||
HostBuffer<u32> counts; counts.setRawPtr( data->m_device, solveData->m_n, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT );
|
||||
HostBuffer<u32> offsets; offsets.setRawPtr( data->m_device, solveData->m_offset, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT );
|
||||
{
|
||||
BoundSearch<TYPE_HOST>::Data* sData = BoundSearch<TYPE_HOST>::allocate( data->m_device );
|
||||
PrefixScan<TYPE_HOST>::Data* pData = PrefixScan<TYPE_HOST>::allocate( data->m_device, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT );
|
||||
|
||||
BoundSearch<TYPE_HOST>::execute( sData, sortData, nContacts, counts, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT, BoundSearchBase::COUNT );
|
||||
|
||||
PrefixScan<TYPE_HOST>::execute( pData, counts, offsets, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT );
|
||||
|
||||
BoundSearch<TYPE_HOST>::deallocate( sData );
|
||||
PrefixScan<TYPE_HOST>::deallocate( pData );
|
||||
}
|
||||
#if defined(_DEBUG)
|
||||
{
|
||||
HostBuffer<u32> n0( data->m_device, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT );
|
||||
HostBuffer<u32> offset0( data->m_device, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT );
|
||||
for(int i=0; i<adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT; i++)
|
||||
{
|
||||
n0[i] = 0;
|
||||
offset0[i] = 0;
|
||||
}
|
||||
|
||||
for(int i=0; i<nContacts; i++)
|
||||
{
|
||||
int idx = sortData[i].m_key;
|
||||
n0[idx]++;
|
||||
}
|
||||
|
||||
// scan
|
||||
int sum = 0;
|
||||
for(int i=0; i<adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT; i++)
|
||||
{
|
||||
offset0[i] = sum;
|
||||
sum += n0[i];
|
||||
}
|
||||
|
||||
for(int i=0; i<adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT; i++)
|
||||
{
|
||||
ADLASSERT( n0[i] == counts[i] );
|
||||
ADLASSERT( offset0[i] == offsets[i] );
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
{ // 5. sort constraints by cellIdx
|
||||
Contact4* old = new Contact4[nContacts];
|
||||
memcpy( old, contactNative->m_ptr, sizeof(Contact4)*nContacts );
|
||||
for(int i=0; i<nContacts; i++)
|
||||
{
|
||||
int srcIdx = sortData[i].m_value;
|
||||
(*contactNative)[i] = old[srcIdx];
|
||||
}
|
||||
delete [] old;
|
||||
}
|
||||
}
|
||||
|
||||
BufferUtils::unmap<false>( bodyNative, bodyBuf );
|
||||
BufferUtils::unmap<true>( contactNative, contactsIn );
|
||||
}
|
||||
|
||||
static void reorderConvertToConstraints2( Solver<TYPE_HOST>::Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf,
|
||||
const Buffer<RigidBodyBase::Inertia>* shapeBuf,
|
||||
adl::Buffer<Contact4>* contactsIn, SolverData contactCOut, void* additionalData,
|
||||
int nContacts, const Solver<TYPE_HOST>::ConstraintCfg& cfg )
|
||||
{
|
||||
|
||||
|
||||
sortContacts2( data, bodyBuf, contactsIn, additionalData, nContacts, cfg );
|
||||
|
||||
{
|
||||
SolverInl::ParallelSolveData* solveData = (SolverInl::ParallelSolveData*)data->m_parallelSolveData;
|
||||
Buffer<u32> n; n.setRawPtr( data->m_device, solveData->m_n, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT );
|
||||
Buffer<u32> offsets; offsets.setRawPtr( data->m_device, solveData->m_offset, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT );
|
||||
Solver<TYPE_HOST>::batchContacts( data, contactsIn, nContacts, &n, &offsets, cfg.m_staticIdx );
|
||||
printf("hello\n");
|
||||
}
|
||||
|
||||
Solver<TYPE_HOST>::convertToConstraints( data, bodyBuf, shapeBuf, contactsIn, contactCOut, additionalData, nContacts, cfg );
|
||||
}
|
||||
|
||||
template<DeviceType TYPE>
|
||||
static void solveContactConstraint( Solver<TYPE_HOST>::Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf, const Buffer<RigidBodyBase::Inertia>* shapeBuf,
|
||||
SolverData constraint, void* additionalData, int n )
|
||||
{
|
||||
|
||||
Buffer<RigidBodyBase::Body>* bodyNative
|
||||
= BufferUtils::map<TYPE_HOST, true>( data->m_device, bodyBuf );
|
||||
Buffer<RigidBodyBase::Inertia>* shapeNative
|
||||
= BufferUtils::map<TYPE_HOST, true>( data->m_device, shapeBuf );
|
||||
Buffer<Constraint4>* constraintNative
|
||||
= BufferUtils::map<TYPE_HOST, true>( data->m_device, (const Buffer<Constraint4>*)constraint );
|
||||
|
||||
for(int iter=0; iter<data->m_nIterations; iter++)
|
||||
{
|
||||
SolveTask task( bodyNative, shapeNative, constraintNative, 0, n );
|
||||
task.m_solveFriction = false;
|
||||
task.run(0);
|
||||
}
|
||||
|
||||
for(int iter=0; iter<data->m_nIterations; iter++)
|
||||
{
|
||||
SolveTask task( bodyNative, shapeNative, constraintNative, 0, n );
|
||||
task.m_solveFriction = true;
|
||||
task.run(0);
|
||||
}
|
||||
|
||||
BufferUtils::unmap<true>( bodyNative, bodyBuf );
|
||||
BufferUtils::unmap<false>( shapeNative, shapeBuf );
|
||||
BufferUtils::unmap<false>( constraintNative, (const Buffer<Constraint4>*)constraint );
|
||||
}
|
||||
|
||||
#if 0
|
||||
static
|
||||
int createSolveTasks( int batchIdx, Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf, const Buffer<RigidBodyBase::Inertia>* shapeBuf,
|
||||
SolverData constraint, int n, ThreadPool::Task* tasksOut[], int taskCapacity )
|
||||
{
|
||||
/*
|
||||
ADLASSERT( (N_SPLIT&1) == 0 );
|
||||
ADLASSERT( batchIdx < N_BATCHES );
|
||||
ADLASSERT( data->m_device->m_type == TYPE_HOST );
|
||||
ADLASSERT( data->m_parallelSolveData );
|
||||
|
||||
SolverInl::ParallelSolveData* solveData = (SolverInl::ParallelSolveData*)data->m_parallelSolveData;
|
||||
data->m_batchIdx = 0;
|
||||
|
||||
const int nx = N_SPLIT/2;
|
||||
|
||||
int nTasksCreated = 0;
|
||||
|
||||
// for(int ii=0; ii<2; ii++)
|
||||
for(batchIdx=0; batchIdx<4; batchIdx++)
|
||||
{
|
||||
int2 offset = make_int2( batchIdx&1, batchIdx>>1 );
|
||||
for(int ix=0; ix<nx; ix++) for(int iy=0; iy<nx; iy++)
|
||||
{
|
||||
int xIdx = ix*2 + offset.x;
|
||||
int yIdx = iy*2 + offset.y;
|
||||
int cellIdx = xIdx+yIdx*N_SPLIT;
|
||||
|
||||
int n = solveData->m_n[cellIdx];
|
||||
int start = solveData->m_offset[cellIdx];
|
||||
|
||||
if( n == 0 ) continue;
|
||||
|
||||
SolveTask* task = new SolveTask( bodyBuf, shapeBuf, (const Buffer<Constraint4>*)constraint, start, n );
|
||||
// task->m_solveFriction = (ii==0)? false:true;
|
||||
tasksOut[nTasksCreated++] = task;
|
||||
}
|
||||
}
|
||||
|
||||
return nTasksCreated;
|
||||
*/
|
||||
ADLASSERT(0);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
static void convertToConstraints2( Solver<TYPE_HOST>::Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf,
|
||||
const Buffer<RigidBodyBase::Inertia>* shapeBuf,
|
||||
Buffer<Contact4>* contactsIn, SolverData contactCOut, void* additionalData,
|
||||
int nContacts, const Solver<TYPE_HOST>::ConstraintCfg& cfg )
|
||||
{
|
||||
ADLASSERT( data->m_device->m_type == TYPE_HOST );
|
||||
|
||||
HostBuffer<RigidBodyBase::Body>* bodyNative
|
||||
= (HostBuffer<RigidBodyBase::Body>*)BufferUtils::map<TYPE_HOST, true>( data->m_device, bodyBuf );
|
||||
HostBuffer<RigidBodyBase::Inertia>* shapeNative
|
||||
= (HostBuffer<RigidBodyBase::Inertia>*)BufferUtils::map<TYPE_HOST, true>( data->m_device, shapeBuf );
|
||||
HostBuffer<Contact4>* contactNative
|
||||
= (HostBuffer<Contact4>*)BufferUtils::map<TYPE_HOST, true>( data->m_device, contactsIn );
|
||||
HostBuffer<Constraint4>* constraintNative
|
||||
= (HostBuffer<Constraint4>*)BufferUtils::map<TYPE_HOST, false>( data->m_device, (Buffer<Constraint4>*)contactCOut );
|
||||
|
||||
{
|
||||
#if !defined(_DEBUG)
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for(int i=0; i<nContacts; i++)
|
||||
{
|
||||
// new (constraintNative+i)Constraint4;
|
||||
Contact4& contact = (*contactNative)[i];
|
||||
|
||||
if( contact.isInvalid() ) continue;
|
||||
|
||||
int aIdx = (int)contact.m_bodyAPtr;
|
||||
int bIdx = (int)contact.m_bodyBPtr;
|
||||
|
||||
{
|
||||
const RigidBodyBase::Body& bodyA = (*bodyNative)[aIdx];
|
||||
const RigidBodyBase::Body& bodyB = (*bodyNative)[bIdx];
|
||||
MYF4 posA( bodyA.m_pos );
|
||||
MYF4 linVelA( bodyA.m_linVel );
|
||||
MYF4 angVelA( bodyA.m_angVel );
|
||||
MYF4 posB( bodyB.m_pos );
|
||||
MYF4 linVelB( bodyB.m_linVel );
|
||||
MYF4 angVelB( bodyB.m_angVel );
|
||||
|
||||
bool aIsInactive = ( isZero( linVelA ) && isZero( angVelA ) );
|
||||
bool bIsInactive = ( isZero( linVelB ) && isZero( angVelB ) );
|
||||
|
||||
SolverInl::setConstraint4( posA, linVelA, angVelA,
|
||||
//(*bodyNative)[aIdx].m_invMass, (*shapeNative)[aIdx].m_invInertia,
|
||||
(aIsInactive)? 0.f : (*bodyNative)[aIdx].m_invMass, (aIsInactive)? mtZero() : (*shapeNative)[aIdx].m_invInertia,
|
||||
posB, linVelB, angVelB,
|
||||
//(*bodyNative)[bIdx].m_invMass, (*shapeNative)[bIdx].m_invInertia,
|
||||
(bIsInactive)? 0.f : (*bodyNative)[bIdx].m_invMass, (bIsInactive)? mtZero() : (*shapeNative)[bIdx].m_invInertia,
|
||||
contact, cfg,
|
||||
(*constraintNative)[i] );
|
||||
(*constraintNative)[i].m_batchIdx = contact.getBatchIdx();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BufferUtils::unmap<false>( bodyNative, bodyBuf );
|
||||
BufferUtils::unmap<false>( shapeNative, shapeBuf );
|
||||
BufferUtils::unmap<false>( contactNative, contactsIn );
|
||||
BufferUtils::unmap<true>( constraintNative, (Buffer<Constraint4>*)contactCOut );
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
static void batchContacts2( Solver<TYPE_HOST>::Data* data, Buffer<Contact4>* contacts, int nContacts, Buffer<u32>* n, Buffer<u32>* offsets, int staticIdx )
|
||||
{
|
||||
ADLASSERT( data->m_device->m_type == TYPE_HOST );
|
||||
|
||||
HostBuffer<Contact4>* contactNative =0;
|
||||
HostBuffer<u32>* nNative =0;
|
||||
HostBuffer<u32>* offsetsNative =0;
|
||||
|
||||
int sz = sizeof(Contact4);
|
||||
int sz2 = sizeof(int2);
|
||||
{
|
||||
BT_PROFILE("BufferUtils::map");
|
||||
contactNative = (HostBuffer<Contact4>*)BufferUtils::map<TYPE_HOST, true>( data->m_device, contacts, nContacts );
|
||||
}
|
||||
{
|
||||
BT_PROFILE("BufferUtils::map2");
|
||||
nNative = (HostBuffer<u32>*)BufferUtils::map<TYPE_HOST, true>( data->m_device, n );
|
||||
offsetsNative= (HostBuffer<u32>*)BufferUtils::map<TYPE_HOST, true>( data->m_device, offsets );
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
BT_PROFILE("sortConstraintByBatch");
|
||||
int numNonzeroGrid=0;
|
||||
int maxNumBatches = 0;
|
||||
|
||||
for(int i=0; i<adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT; i++)
|
||||
{
|
||||
int n = (*nNative)[i];
|
||||
int offset = (*offsetsNative)[i];
|
||||
|
||||
if( n )
|
||||
{
|
||||
numNonzeroGrid++;
|
||||
int numBatches = SolverInl::sortConstraintByBatch( contactNative->m_ptr+offset, n, staticIdx,-1 ); // on GPU
|
||||
maxNumBatches = max(numBatches,maxNumBatches);
|
||||
|
||||
// SolverInl::sortConstraintByBatch( contactNative->m_ptr+offset, n, staticIdx ); // on CPU
|
||||
}
|
||||
}
|
||||
|
||||
printf("maxNumBatches = %d\n", maxNumBatches);
|
||||
}
|
||||
|
||||
{
|
||||
BT_PROFILE("BufferUtils::unmap");
|
||||
BufferUtils::unmap<true>( contactNative, contacts, nContacts );
|
||||
}
|
||||
{
|
||||
BT_PROFILE("BufferUtils::unmap2");
|
||||
BufferUtils::unmap<false>( nNative, n );
|
||||
BufferUtils::unmap<false>( offsetsNative, offsets );
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,338 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
#pragma OPENCL EXTENSION cl_amd_printf : enable
|
||||
#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
|
||||
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
|
||||
#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable
|
||||
#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable
|
||||
|
||||
#ifdef cl_ext_atomic_counters_32
|
||||
#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable
|
||||
#else
|
||||
#define counter32_t volatile __global int*
|
||||
#endif
|
||||
|
||||
|
||||
typedef unsigned int u32;
|
||||
typedef unsigned short u16;
|
||||
typedef unsigned char u8;
|
||||
|
||||
#define GET_GROUP_IDX get_group_id(0)
|
||||
#define GET_LOCAL_IDX get_local_id(0)
|
||||
#define GET_GLOBAL_IDX get_global_id(0)
|
||||
#define GET_GROUP_SIZE get_local_size(0)
|
||||
#define GET_NUM_GROUPS get_num_groups(0)
|
||||
#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)
|
||||
#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)
|
||||
#define AtomInc(x) atom_inc(&(x))
|
||||
#define AtomInc1(x, out) out = atom_inc(&(x))
|
||||
#define AppendInc(x, out) out = atomic_inc(x)
|
||||
#define AtomAdd(x, value) atom_add(&(x), value)
|
||||
#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )
|
||||
#define AtomXhg(x, value) atom_xchg ( &(x), value )
|
||||
|
||||
|
||||
#define SELECT_UINT4( b, a, condition ) select( b,a,condition )
|
||||
|
||||
#define make_float4 (float4)
|
||||
#define make_float2 (float2)
|
||||
#define make_uint4 (uint4)
|
||||
#define make_int4 (int4)
|
||||
#define make_uint2 (uint2)
|
||||
#define make_int2 (int2)
|
||||
|
||||
|
||||
#define max2 max
|
||||
#define min2 min
|
||||
|
||||
|
||||
#define WG_SIZE 64
|
||||
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
float4 m_worldPos[4];
|
||||
float4 m_worldNormal;
|
||||
u32 m_coeffs;
|
||||
int m_batchIdx;
|
||||
|
||||
u32 m_bodyA;
|
||||
u32 m_bodyB;
|
||||
}Contact4;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int m_n;
|
||||
int m_start;
|
||||
int m_staticIdx;
|
||||
int m_paddings[1];
|
||||
} ConstBuffer;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
u32 m_a;
|
||||
u32 m_b;
|
||||
u32 m_idx;
|
||||
}Elem;
|
||||
|
||||
#define STACK_SIZE (WG_SIZE*10)
|
||||
//#define STACK_SIZE (WG_SIZE)
|
||||
#define RING_SIZE 1024
|
||||
#define RING_SIZE_MASK (RING_SIZE-1)
|
||||
#define CHECK_SIZE (WG_SIZE)
|
||||
|
||||
|
||||
#define GET_RING_CAPACITY (RING_SIZE - ldsRingEnd)
|
||||
#define RING_END ldsTmp
|
||||
|
||||
u32 readBuf(__local u32* buff, int idx)
|
||||
{
|
||||
idx = idx % (32*CHECK_SIZE);
|
||||
int bitIdx = idx%32;
|
||||
int bufIdx = idx/32;
|
||||
return buff[bufIdx] & (1<<bitIdx);
|
||||
}
|
||||
|
||||
void writeBuf(__local u32* buff, int idx)
|
||||
{
|
||||
idx = idx % (32*CHECK_SIZE);
|
||||
int bitIdx = idx%32;
|
||||
int bufIdx = idx/32;
|
||||
// buff[bufIdx] |= (1<<bitIdx);
|
||||
atom_or( &buff[bufIdx], (1<<bitIdx) );
|
||||
}
|
||||
|
||||
u32 tryWrite(__local u32* buff, int idx)
|
||||
{
|
||||
idx = idx % (32*CHECK_SIZE);
|
||||
int bitIdx = idx%32;
|
||||
int bufIdx = idx/32;
|
||||
u32 ans = (u32)atom_or( &buff[bufIdx], (1<<bitIdx) );
|
||||
return ((ans >> bitIdx)&1) == 0;
|
||||
}
|
||||
|
||||
// batching on the GPU
|
||||
__kernel void CreateBatches( __global Contact4* gConstraints, __global Contact4* gConstraintsOut,
|
||||
__global u32* gN, __global u32* gStart,
|
||||
ConstBuffer cb )
|
||||
{
|
||||
__local u32 ldsStackIdx[STACK_SIZE];
|
||||
__local u32 ldsStackEnd;
|
||||
__local Elem ldsRingElem[RING_SIZE];
|
||||
__local u32 ldsRingEnd;
|
||||
__local u32 ldsTmp;
|
||||
__local u32 ldsCheckBuffer[CHECK_SIZE];
|
||||
__local u32 ldsFixedBuffer[CHECK_SIZE];
|
||||
__local u32 ldsGEnd;
|
||||
__local u32 ldsDstEnd;
|
||||
|
||||
int wgIdx = GET_GROUP_IDX;
|
||||
int lIdx = GET_LOCAL_IDX;
|
||||
|
||||
const int m_n = gN[wgIdx];
|
||||
const int m_start = gStart[wgIdx];
|
||||
const int m_staticIdx = cb.m_staticIdx;
|
||||
|
||||
if( lIdx == 0 )
|
||||
{
|
||||
ldsRingEnd = 0;
|
||||
ldsGEnd = 0;
|
||||
ldsStackEnd = 0;
|
||||
ldsDstEnd = m_start;
|
||||
}
|
||||
|
||||
// while(1)
|
||||
for(int ie=0; ie<250; ie++)
|
||||
{
|
||||
ldsFixedBuffer[lIdx] = 0;
|
||||
|
||||
for(int giter=0; giter<4; giter++)
|
||||
{
|
||||
int ringCap = GET_RING_CAPACITY;
|
||||
|
||||
// 1. fill ring
|
||||
if( ldsGEnd < m_n )
|
||||
{
|
||||
while( ringCap > WG_SIZE )
|
||||
{
|
||||
if( ldsGEnd >= m_n ) break;
|
||||
if( lIdx < ringCap - WG_SIZE )
|
||||
{
|
||||
int srcIdx;
|
||||
AtomInc1( ldsGEnd, srcIdx );
|
||||
if( srcIdx < m_n )
|
||||
{
|
||||
int dstIdx;
|
||||
AtomInc1( ldsRingEnd, dstIdx );
|
||||
|
||||
int a = gConstraints[m_start+srcIdx].m_bodyA;
|
||||
int b = gConstraints[m_start+srcIdx].m_bodyB;
|
||||
ldsRingElem[dstIdx].m_a = (a>b)? b:a;
|
||||
ldsRingElem[dstIdx].m_b = (a>b)? a:b;
|
||||
ldsRingElem[dstIdx].m_idx = srcIdx;
|
||||
}
|
||||
}
|
||||
ringCap = GET_RING_CAPACITY;
|
||||
}
|
||||
}
|
||||
|
||||
GROUP_LDS_BARRIER;
|
||||
|
||||
// 2. fill stack
|
||||
__local Elem* dst = ldsRingElem;
|
||||
if( lIdx == 0 ) RING_END = 0;
|
||||
|
||||
int srcIdx=lIdx;
|
||||
int end = ldsRingEnd;
|
||||
|
||||
{
|
||||
for(int ii=0; ii<end; ii+=WG_SIZE, srcIdx+=WG_SIZE)
|
||||
{
|
||||
Elem e;
|
||||
if(srcIdx<end) e = ldsRingElem[srcIdx];
|
||||
bool done = (srcIdx<end)?false:true;
|
||||
|
||||
for(int i=lIdx; i<CHECK_SIZE; i+=WG_SIZE) ldsCheckBuffer[lIdx] = 0;
|
||||
|
||||
if( !done )
|
||||
{
|
||||
int aUsed = readBuf( ldsFixedBuffer, e.m_a);
|
||||
int bUsed = readBuf( ldsFixedBuffer, e.m_b);
|
||||
|
||||
if( aUsed==0 && bUsed==0 )
|
||||
{
|
||||
int aAvailable;
|
||||
int bAvailable;
|
||||
|
||||
aAvailable = tryWrite( ldsCheckBuffer, e.m_a );
|
||||
bAvailable = tryWrite( ldsCheckBuffer, e.m_b );
|
||||
|
||||
//aAvailable = (m_staticIdx == e.m_a)? 1: aAvailable;
|
||||
//bAvailable = (m_staticIdx == e.m_b)? 1: bAvailable;
|
||||
|
||||
bool success = (aAvailable && bAvailable);
|
||||
if(success)
|
||||
{
|
||||
writeBuf( ldsFixedBuffer, e.m_a );
|
||||
writeBuf( ldsFixedBuffer, e.m_b );
|
||||
}
|
||||
done = success;
|
||||
}
|
||||
}
|
||||
|
||||
// put it aside
|
||||
if(srcIdx<end)
|
||||
{
|
||||
if( done )
|
||||
{
|
||||
int dstIdx; AtomInc1( ldsStackEnd, dstIdx );
|
||||
if( dstIdx < STACK_SIZE )
|
||||
ldsStackIdx[dstIdx] = e.m_idx;
|
||||
else{
|
||||
done = false;
|
||||
AtomAdd( ldsStackEnd, -1 );
|
||||
}
|
||||
}
|
||||
if( !done )
|
||||
{
|
||||
int dstIdx; AtomInc1( RING_END, dstIdx );
|
||||
dst[dstIdx] = e;
|
||||
}
|
||||
}
|
||||
|
||||
// if filled, flush
|
||||
if( ldsStackEnd == STACK_SIZE )
|
||||
{
|
||||
for(int i=lIdx; i<STACK_SIZE; i+=WG_SIZE)
|
||||
{
|
||||
int idx = m_start + ldsStackIdx[i];
|
||||
int dstIdx; AtomInc1( ldsDstEnd, dstIdx );
|
||||
gConstraintsOut[ dstIdx ] = gConstraints[ idx ];
|
||||
gConstraintsOut[ dstIdx ].m_batchIdx = ie;
|
||||
}
|
||||
if( lIdx == 0 ) ldsStackEnd = 0;
|
||||
|
||||
//for(int i=lIdx; i<CHECK_SIZE; i+=WG_SIZE)
|
||||
ldsFixedBuffer[lIdx] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( lIdx == 0 ) ldsRingEnd = RING_END;
|
||||
}
|
||||
|
||||
GROUP_LDS_BARRIER;
|
||||
|
||||
for(int i=lIdx; i<ldsStackEnd; i+=WG_SIZE)
|
||||
{
|
||||
int idx = m_start + ldsStackIdx[i];
|
||||
int dstIdx; AtomInc1( ldsDstEnd, dstIdx );
|
||||
gConstraintsOut[ dstIdx ] = gConstraints[ idx ];
|
||||
gConstraintsOut[ dstIdx ].m_batchIdx = ie;
|
||||
}
|
||||
|
||||
// in case it couldn't consume any pair. Flush them
|
||||
// todo. Serial batch worth while?
|
||||
if( ldsStackEnd == 0 )
|
||||
{
|
||||
for(int i=lIdx; i<ldsRingEnd; i+=WG_SIZE)
|
||||
{
|
||||
int idx = m_start + ldsRingElem[i].m_idx;
|
||||
int dstIdx; AtomInc1( ldsDstEnd, dstIdx );
|
||||
gConstraintsOut[ dstIdx ] = gConstraints[ idx ];
|
||||
gConstraintsOut[ dstIdx ].m_batchIdx = 100+i;
|
||||
}
|
||||
GROUP_LDS_BARRIER;
|
||||
if( lIdx == 0 ) ldsRingEnd = 0;
|
||||
}
|
||||
|
||||
if( lIdx == 0 ) ldsStackEnd = 0;
|
||||
|
||||
GROUP_LDS_BARRIER;
|
||||
|
||||
// termination
|
||||
if( ldsGEnd == m_n && ldsRingEnd == 0 )
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,371 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
static const char* batchingKernelsCL= \
|
||||
"\n"
|
||||
"#pragma OPENCL EXTENSION cl_amd_printf : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n"
|
||||
"\n"
|
||||
"#ifdef cl_ext_atomic_counters_32\n"
|
||||
"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n"
|
||||
"#else\n"
|
||||
"#define counter32_t volatile __global int*\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef unsigned int u32;\n"
|
||||
"typedef unsigned short u16;\n"
|
||||
"typedef unsigned char u8;\n"
|
||||
"\n"
|
||||
"#define GET_GROUP_IDX get_group_id(0)\n"
|
||||
"#define GET_LOCAL_IDX get_local_id(0)\n"
|
||||
"#define GET_GLOBAL_IDX get_global_id(0)\n"
|
||||
"#define GET_GROUP_SIZE get_local_size(0)\n"
|
||||
"#define GET_NUM_GROUPS get_num_groups(0)\n"
|
||||
"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n"
|
||||
"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n"
|
||||
"#define AtomInc(x) atom_inc(&(x))\n"
|
||||
"#define AtomInc1(x, out) out = atom_inc(&(x))\n"
|
||||
"#define AppendInc(x, out) out = atomic_inc(x)\n"
|
||||
"#define AtomAdd(x, value) atom_add(&(x), value)\n"
|
||||
"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n"
|
||||
"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n"
|
||||
"\n"
|
||||
"#define make_float4 (float4)\n"
|
||||
"#define make_float2 (float2)\n"
|
||||
"#define make_uint4 (uint4)\n"
|
||||
"#define make_int4 (int4)\n"
|
||||
"#define make_uint2 (uint2)\n"
|
||||
"#define make_int2 (int2)\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define max2 max\n"
|
||||
"#define min2 min\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define WG_SIZE 64\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef struct \n"
|
||||
"{\n"
|
||||
" float4 m_worldPos[4];\n"
|
||||
" float4 m_worldNormal;\n"
|
||||
" u32 m_coeffs;\n"
|
||||
" int m_batchIdx;\n"
|
||||
"\n"
|
||||
" u32 m_bodyA;\n"
|
||||
" u32 m_bodyB;\n"
|
||||
"}Contact4;\n"
|
||||
"\n"
|
||||
"typedef struct \n"
|
||||
"{\n"
|
||||
" int m_n;\n"
|
||||
" int m_start;\n"
|
||||
" int m_staticIdx;\n"
|
||||
" int m_paddings[1];\n"
|
||||
"} ConstBuffer;\n"
|
||||
"\n"
|
||||
"typedef struct \n"
|
||||
"{\n"
|
||||
" u32 m_a;\n"
|
||||
" u32 m_b;\n"
|
||||
" u32 m_idx;\n"
|
||||
"}Elem;\n"
|
||||
"\n"
|
||||
"#define STACK_SIZE (WG_SIZE*10)\n"
|
||||
"//#define STACK_SIZE (WG_SIZE)\n"
|
||||
"#define RING_SIZE 1024\n"
|
||||
"#define RING_SIZE_MASK (RING_SIZE-1)\n"
|
||||
"#define CHECK_SIZE (WG_SIZE)\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define GET_RING_CAPACITY (RING_SIZE - ldsRingEnd)\n"
|
||||
"#define RING_END ldsTmp\n"
|
||||
"\n"
|
||||
"u32 readBuf(__local u32* buff, int idx)\n"
|
||||
"{\n"
|
||||
" idx = idx % (32*CHECK_SIZE);\n"
|
||||
" int bitIdx = idx%32;\n"
|
||||
" int bufIdx = idx/32;\n"
|
||||
" return buff[bufIdx] & (1<<bitIdx);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"void writeBuf(__local u32* buff, int idx)\n"
|
||||
"{\n"
|
||||
" idx = idx % (32*CHECK_SIZE);\n"
|
||||
" int bitIdx = idx%32;\n"
|
||||
" int bufIdx = idx/32;\n"
|
||||
"// buff[bufIdx] |= (1<<bitIdx);\n"
|
||||
" atom_or( &buff[bufIdx], (1<<bitIdx) );\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"u32 tryWrite(__local u32* buff, int idx)\n"
|
||||
"{\n"
|
||||
" idx = idx % (32*CHECK_SIZE);\n"
|
||||
" int bitIdx = idx%32;\n"
|
||||
" int bufIdx = idx/32;\n"
|
||||
" u32 ans = (u32)atom_or( &buff[bufIdx], (1<<bitIdx) );\n"
|
||||
" return ((ans >> bitIdx)&1) == 0;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"typedef struct \n"
|
||||
"{\n"
|
||||
" int m_valInt0;\n"
|
||||
" int m_valInt1;\n"
|
||||
" int m_valInt2;\n"
|
||||
" int m_valInt3;\n"
|
||||
"\n"
|
||||
" int m_valInt4;\n"
|
||||
" int m_valInt5;\n"
|
||||
" int m_valInt6;\n"
|
||||
" int m_valInt7;\n"
|
||||
"\n"
|
||||
" int m_valInt8;\n"
|
||||
" int m_valInt9;\n"
|
||||
" int m_valInt10;\n"
|
||||
" int m_valInt11;\n"
|
||||
" \n"
|
||||
" int m_valInt12;\n"
|
||||
" int m_valInt13;\n"
|
||||
" int m_valInt14;\n"
|
||||
" int m_valInt15;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" float m_fval0;\n"
|
||||
" float m_fval1;\n"
|
||||
" float m_fval2;\n"
|
||||
" float m_fval3;\n"
|
||||
"} SolverDebugInfo;\n"
|
||||
"\n"
|
||||
"// batching on the GPU\n"
|
||||
"__kernel void CreateBatches( __global Contact4* gConstraints, __global Contact4* gConstraintsOut, //__global u32* gRes, \n"
|
||||
" __global u32* gN, __global u32* gStart, \n"
|
||||
"// __global SolverDebugInfo* debugInfo, \n"
|
||||
" ConstBuffer cb )\n"
|
||||
"{\n"
|
||||
" __local u32 ldsStackIdx[STACK_SIZE];\n"
|
||||
" __local u32 ldsStackEnd;\n"
|
||||
" __local Elem ldsRingElem[RING_SIZE];\n"
|
||||
" __local u32 ldsRingEnd;\n"
|
||||
" __local u32 ldsTmp;\n"
|
||||
" __local u32 ldsCheckBuffer[CHECK_SIZE];\n"
|
||||
" __local u32 ldsFixedBuffer[CHECK_SIZE];\n"
|
||||
" __local u32 ldsGEnd;\n"
|
||||
" __local u32 ldsDstEnd;\n"
|
||||
"\n"
|
||||
" int wgIdx = GET_GROUP_IDX;\n"
|
||||
" int lIdx = GET_LOCAL_IDX;\n"
|
||||
" \n"
|
||||
" const int m_n = gN[wgIdx];\n"
|
||||
" const int m_start = gStart[wgIdx];\n"
|
||||
" const int m_staticIdx = cb.m_staticIdx;\n"
|
||||
" \n"
|
||||
" if( lIdx == 0 )\n"
|
||||
" {\n"
|
||||
" ldsRingEnd = 0;\n"
|
||||
" ldsGEnd = 0;\n"
|
||||
" ldsStackEnd = 0;\n"
|
||||
" ldsDstEnd = m_start;\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
"// while(1)\n"
|
||||
" for(int ie=0; ie<250; ie++)\n"
|
||||
" {\n"
|
||||
" ldsFixedBuffer[lIdx] = 0;\n"
|
||||
"\n"
|
||||
" for(int giter=0; giter<4; giter++)\n"
|
||||
" {\n"
|
||||
" int ringCap = GET_RING_CAPACITY;\n"
|
||||
" \n"
|
||||
" // 1. fill ring\n"
|
||||
" if( ldsGEnd < m_n )\n"
|
||||
" {\n"
|
||||
" while( ringCap > WG_SIZE )\n"
|
||||
" {\n"
|
||||
" if( ldsGEnd >= m_n ) break;\n"
|
||||
" if( lIdx < ringCap - WG_SIZE )\n"
|
||||
" {\n"
|
||||
" int srcIdx;\n"
|
||||
" AtomInc1( ldsGEnd, srcIdx );\n"
|
||||
" if( srcIdx < m_n )\n"
|
||||
" {\n"
|
||||
" int dstIdx;\n"
|
||||
" AtomInc1( ldsRingEnd, dstIdx );\n"
|
||||
" \n"
|
||||
" int a = gConstraints[m_start+srcIdx].m_bodyA;\n"
|
||||
" int b = gConstraints[m_start+srcIdx].m_bodyB;\n"
|
||||
" ldsRingElem[dstIdx].m_a = (a>b)? b:a;\n"
|
||||
" ldsRingElem[dstIdx].m_b = (a>b)? a:b;\n"
|
||||
" ldsRingElem[dstIdx].m_idx = srcIdx;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" ringCap = GET_RING_CAPACITY;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
" \n"
|
||||
" // 2. fill stack\n"
|
||||
" __local Elem* dst = ldsRingElem;\n"
|
||||
" if( lIdx == 0 ) RING_END = 0;\n"
|
||||
"\n"
|
||||
" int srcIdx=lIdx;\n"
|
||||
" int end = ldsRingEnd;\n"
|
||||
"\n"
|
||||
" {\n"
|
||||
" for(int ii=0; ii<end; ii+=WG_SIZE, srcIdx+=WG_SIZE)\n"
|
||||
" {\n"
|
||||
" Elem e;\n"
|
||||
" if(srcIdx<end) e = ldsRingElem[srcIdx];\n"
|
||||
" bool done = (srcIdx<end)?false:true;\n"
|
||||
"\n"
|
||||
" for(int i=lIdx; i<CHECK_SIZE; i+=WG_SIZE) ldsCheckBuffer[lIdx] = 0;\n"
|
||||
" \n"
|
||||
" if( !done )\n"
|
||||
" {\n"
|
||||
" int aUsed = readBuf( ldsFixedBuffer, e.m_a);\n"
|
||||
" int bUsed = readBuf( ldsFixedBuffer, e.m_b);\n"
|
||||
"\n"
|
||||
" if( aUsed==0 && bUsed==0 )\n"
|
||||
" {\n"
|
||||
" int aAvailable;\n"
|
||||
" int bAvailable;\n"
|
||||
"\n"
|
||||
" aAvailable = tryWrite( ldsCheckBuffer, e.m_a );\n"
|
||||
" bAvailable = tryWrite( ldsCheckBuffer, e.m_b );\n"
|
||||
"\n"
|
||||
" //aAvailable = (m_staticIdx == e.m_a)? 1: aAvailable;\n"
|
||||
" //bAvailable = (m_staticIdx == e.m_b)? 1: bAvailable;\n"
|
||||
"\n"
|
||||
" bool success = (aAvailable && bAvailable);\n"
|
||||
" if(success)\n"
|
||||
" {\n"
|
||||
" writeBuf( ldsFixedBuffer, e.m_a );\n"
|
||||
" writeBuf( ldsFixedBuffer, e.m_b );\n"
|
||||
" }\n"
|
||||
" done = success;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" // put it aside\n"
|
||||
" if(srcIdx<end)\n"
|
||||
" {\n"
|
||||
" if( done )\n"
|
||||
" {\n"
|
||||
" int dstIdx; AtomInc1( ldsStackEnd, dstIdx );\n"
|
||||
" if( dstIdx < STACK_SIZE )\n"
|
||||
" ldsStackIdx[dstIdx] = e.m_idx;\n"
|
||||
" else{\n"
|
||||
" done = false;\n"
|
||||
" AtomAdd( ldsStackEnd, -1 );\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" if( !done )\n"
|
||||
" {\n"
|
||||
" int dstIdx; AtomInc1( RING_END, dstIdx );\n"
|
||||
" dst[dstIdx] = e;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" // if filled, flush\n"
|
||||
" if( ldsStackEnd == STACK_SIZE )\n"
|
||||
" {\n"
|
||||
" for(int i=lIdx; i<STACK_SIZE; i+=WG_SIZE)\n"
|
||||
" {\n"
|
||||
" int idx = m_start + ldsStackIdx[i];\n"
|
||||
" int dstIdx; AtomInc1( ldsDstEnd, dstIdx );\n"
|
||||
" gConstraintsOut[ dstIdx ] = gConstraints[ idx ];\n"
|
||||
" gConstraintsOut[ dstIdx ].m_batchIdx = ie;\n"
|
||||
" }\n"
|
||||
" if( lIdx == 0 ) ldsStackEnd = 0;\n"
|
||||
"\n"
|
||||
" //for(int i=lIdx; i<CHECK_SIZE; i+=WG_SIZE) \n"
|
||||
" ldsFixedBuffer[lIdx] = 0;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" if( lIdx == 0 ) ldsRingEnd = RING_END;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
"\n"
|
||||
" for(int i=lIdx; i<ldsStackEnd; i+=WG_SIZE)\n"
|
||||
" {\n"
|
||||
" int idx = m_start + ldsStackIdx[i];\n"
|
||||
" int dstIdx; AtomInc1( ldsDstEnd, dstIdx );\n"
|
||||
" gConstraintsOut[ dstIdx ] = gConstraints[ idx ];\n"
|
||||
" gConstraintsOut[ dstIdx ].m_batchIdx = ie;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" // in case it couldn't consume any pair. Flush them\n"
|
||||
" // todo. Serial batch worth while?\n"
|
||||
" if( ldsStackEnd == 0 )\n"
|
||||
" {\n"
|
||||
" for(int i=lIdx; i<ldsRingEnd; i+=WG_SIZE)\n"
|
||||
" {\n"
|
||||
" int idx = m_start + ldsRingElem[i].m_idx;\n"
|
||||
" int dstIdx; AtomInc1( ldsDstEnd, dstIdx );\n"
|
||||
" gConstraintsOut[ dstIdx ] = gConstraints[ idx ];\n"
|
||||
" gConstraintsOut[ dstIdx ].m_batchIdx = 100+i;\n"
|
||||
" }\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
" if( lIdx == 0 ) ldsRingEnd = 0;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" if( lIdx == 0 ) ldsStackEnd = 0;\n"
|
||||
"\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
"\n"
|
||||
" // termination\n"
|
||||
" if( ldsGEnd == m_n && ldsRingEnd == 0 )\n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
;
|
||||
@@ -0,0 +1,13 @@
|
||||
#!/usr/bin/env python
|
||||
import sys
|
||||
import os
|
||||
import shutil
|
||||
|
||||
arg = sys.argv[1]
|
||||
fh = open(arg)
|
||||
|
||||
print 'static const char* '+sys.argv[2]+'= \\'
|
||||
for line in fh.readlines():
|
||||
a = line.strip('\n')
|
||||
print '"'+a+'\\n"'
|
||||
print ';'
|
||||
@@ -0,0 +1,6 @@
|
||||
stringify.py ChNarrowphaseKernels.cl narrowphaseKernelsCL >ChNarrowphaseKernels.h
|
||||
|
||||
|
||||
@echo Warning:
|
||||
@echo You might still need to find/replace for \\n (due to macros) and replace #include statements by their content
|
||||
pause
|
||||
@@ -0,0 +1,10 @@
|
||||
stringify.py ChNarrowphaseKernels.cl narrowphaseKernelsCL >ChNarrowphaseKernels.h
|
||||
stringify.py SolverKernels.cl solverKernelsCL >SolverKernels.h
|
||||
stringify.py batchingKernels.cl batchingKernelsCL >batchingKernels.h
|
||||
|
||||
|
||||
|
||||
|
||||
@echo Warning:
|
||||
@echo You might still need to find/replace for \\n (due to macros) and replace #include statements by their content
|
||||
pause
|
||||
@@ -0,0 +1,8 @@
|
||||
stringify.py batchingKernels.cl batchingKernelsCL >batchingKernels.h
|
||||
|
||||
|
||||
|
||||
|
||||
@echo Warning:
|
||||
@echo You might still need to find/replace for \\n (due to macros) and replace #include statements by their content
|
||||
pause
|
||||
@@ -0,0 +1,8 @@
|
||||
stringify.py ChNarrowphaseKernels.cl narrowphaseKernelsCL >ChNarrowphaseKernels.h
|
||||
|
||||
|
||||
|
||||
|
||||
@echo Warning:
|
||||
@echo You might still need to find/replace for \\n (due to macros) and replace #include statements by their content
|
||||
pause
|
||||
@@ -0,0 +1,8 @@
|
||||
stringify.py SolverKernels.cl solverKernelsCL >SolverKernels.h
|
||||
|
||||
|
||||
|
||||
|
||||
@echo Warning:
|
||||
@echo You might still need to find/replace for \\n (due to macros) and replace #include statements by their content
|
||||
pause
|
||||
Reference in New Issue
Block a user