joint (non-contact constraint) solver iterations is now working on GPU, but overall slower because of data copy.
Will move joint setup to GPU, and then some benefit should be visible. Don't use 64 alignment, it causes data structures size mismatch between cpu and gpu
This commit is contained in:
@@ -51,6 +51,7 @@ struct b3SimdScalar
|
||||
{
|
||||
__m128 m_vec128;
|
||||
float m_floats[4];
|
||||
float x,y,z,w;
|
||||
int m_ints[4];
|
||||
b3Scalar m_unusedPadding;
|
||||
};
|
||||
@@ -105,7 +106,7 @@ operator+(const b3SimdScalar& v1, const b3SimdScalar& v2)
|
||||
#endif
|
||||
|
||||
///The b3SolverBody is an internal datastructure for the constraint solver. Only necessary data is packed to increase cache coherence/performance.
|
||||
B3_ATTRIBUTE_ALIGNED64 (struct) b3SolverBody
|
||||
B3_ATTRIBUTE_ALIGNED16 (struct) b3SolverBody
|
||||
{
|
||||
B3_DECLARE_ALIGNED_ALLOCATOR();
|
||||
b3Transform m_worldTransform;
|
||||
@@ -125,6 +126,8 @@ B3_ATTRIBUTE_ALIGNED64 (struct) b3SolverBody
|
||||
int m_originalBodyIndex;
|
||||
};
|
||||
|
||||
int padding[3];
|
||||
|
||||
|
||||
void setWorldTransform(const b3Transform& worldTransform)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user