joint (non-contact constraint) solver iterations is now working on GPU, but overall slower because of data copy.

Will move joint setup to GPU, and then some benefit should be visible. Don't use 64 alignment, it causes data structures size mismatch between cpu and gpu
2013-07-06 14:11:32 -07:00
parent 3ca4d68f3e
commit b8d5cecfe3
5 changed files with 96 additions and 35 deletions
--- a/src/Bullet3Dynamics/ConstraintSolver/b3SolverBody.h
+++ b/src/Bullet3Dynamics/ConstraintSolver/b3SolverBody.h
@@ -51,6 +51,7 @@ struct	b3SimdScalar
 	{
 		__m128		m_vec128;
 		float		m_floats[4];
+		float		x,y,z,w;
 		int			m_ints[4];
 		b3Scalar	m_unusedPadding;
 	};
@@ -105,7 +106,7 @@ operator+(const b3SimdScalar& v1, const b3SimdScalar& v2)
 #endif

 ///The b3SolverBody is an internal datastructure for the constraint solver. Only necessary data is packed to increase cache coherence/performance.
-B3_ATTRIBUTE_ALIGNED64 (struct)	b3SolverBody
+B3_ATTRIBUTE_ALIGNED16 (struct)	b3SolverBody
 {
 	B3_DECLARE_ALIGNED_ALLOCATOR();
 	b3Transform		m_worldTransform;
@@ -125,6 +126,8 @@ B3_ATTRIBUTE_ALIGNED64 (struct)	b3SolverBody
 		int		m_originalBodyIndex;
 	};

+	int padding[3];
+

 	void	setWorldTransform(const b3Transform& worldTransform)
 	{