Code-style consistency improvement:

Apply clang-format-all.sh using the _clang-format file through all the cpp/.h files.
make sure not to apply it to certain serialization structures, since some parser expects the * as part of the name, instead of type.
This commit contains no other changes aside from adding and applying clang-format-all.sh
This commit is contained in:
erwincoumans
2018-09-23 14:17:31 -07:00
parent b73b05e9fb
commit ab8f16961e
1773 changed files with 1081087 additions and 474249 deletions

View File

@@ -42,7 +42,10 @@
#include "Test_quat_aos_neon.h"
#include "LinearMath/btScalar.h"
#define ENTRY( _name, _func ) { _name, _func }
#define ENTRY(_name, _func) \
{ \
_name, _func \
}
//
// Test functions have the form int (*TestFunc)( void )
@@ -50,48 +53,46 @@
//
// Please see handy stuff in Utils.h, vector.h when writing your test code.
//
#if defined (BT_USE_NEON) || defined (BT_USE_SSE_IN_API)
#if defined(BT_USE_NEON) || defined(BT_USE_SSE_IN_API)
TestDesc gTestList[] =
{
ENTRY( "maxdot", Test_maxdot ),
ENTRY( "mindot", Test_mindot ),
TestDesc gTestList[] =
{
ENTRY("maxdot", Test_maxdot),
ENTRY("mindot", Test_mindot),
ENTRY( "qtmul", Test_qtmul ),
ENTRY( "qtmulQV3", Test_qtmulQV3 ),
ENTRY( "qtmulV3Q", Test_qtmulV3Q ),
ENTRY( "qtdot", Test_qtdot ),
ENTRY( "qtnorm", Test_qtnorm ),
ENTRY("qtmul", Test_qtmul),
ENTRY("qtmulQV3", Test_qtmulQV3),
ENTRY("qtmulV3Q", Test_qtmulV3Q),
ENTRY("qtdot", Test_qtdot),
ENTRY("qtnorm", Test_qtnorm),
ENTRY( "v3dot", Test_v3dot ),
ENTRY( "v3sdiv", Test_v3sdiv ),
ENTRY( "v3norm", Test_v3norm ),
ENTRY( "v3cross", Test_v3cross ),
ENTRY( "v3triple", Test_v3triple ),
ENTRY( "v3interp", Test_v3interp ),
ENTRY( "v3lerp", Test_v3lerp ),
ENTRY( "v3skew", Test_v3skew ),
ENTRY( "v3div", Test_v3div ),
ENTRY( "v3rotate", Test_v3rotate ),
ENTRY("v3dot", Test_v3dot),
ENTRY("v3sdiv", Test_v3sdiv),
ENTRY("v3norm", Test_v3norm),
ENTRY("v3cross", Test_v3cross),
ENTRY("v3triple", Test_v3triple),
ENTRY("v3interp", Test_v3interp),
ENTRY("v3lerp", Test_v3lerp),
ENTRY("v3skew", Test_v3skew),
ENTRY("v3div", Test_v3div),
ENTRY("v3rotate", Test_v3rotate),
ENTRY( "dot3", Test_dot3 ),
ENTRY( "3x3transpose", Test_3x3transpose ),
ENTRY( "3x3transposeTimes", Test_3x3transposeTimes ),
ENTRY( "3x3timesTranspose", Test_3x3timesTranspose ),
ENTRY( "3x3mulM", Test_3x3mulM ),
ENTRY( "3x3mulM1M2", Test_3x3mulM1M2 ),
ENTRY( "3x3mulMV", Test_3x3mulMV ),
ENTRY( "3x3mulVM", Test_3x3mulMV ),
ENTRY( "3x3setRot", Test_3x3setRot ),
ENTRY( "3x3getRot", Test_3x3getRot ),
ENTRY( "btDbvt", Test_btDbvt ),
ENTRY("quat_aos_neon", Test_quat_aos_neon),
{ NULL, NULL }
};
ENTRY("dot3", Test_dot3),
ENTRY("3x3transpose", Test_3x3transpose),
ENTRY("3x3transposeTimes", Test_3x3transposeTimes),
ENTRY("3x3timesTranspose", Test_3x3timesTranspose),
ENTRY("3x3mulM", Test_3x3mulM),
ENTRY("3x3mulM1M2", Test_3x3mulM1M2),
ENTRY("3x3mulMV", Test_3x3mulMV),
ENTRY("3x3mulVM", Test_3x3mulMV),
ENTRY("3x3setRot", Test_3x3setRot),
ENTRY("3x3getRot", Test_3x3getRot),
ENTRY("btDbvt", Test_btDbvt),
ENTRY("quat_aos_neon", Test_quat_aos_neon),
{NULL, NULL}};
#else
TestDesc gTestList[]={{NULL,NULL}};
TestDesc gTestList[] = {{NULL, NULL}};
#endif

View File

@@ -9,18 +9,18 @@
#define BulletTest_TestList_h
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
typedef struct TestDesc
{
const char *name;
int (*test_func)(void); // return 0 for success, non-zero for failure
}TestDesc;
typedef struct TestDesc
{
const char *name;
int (*test_func)(void); // return 0 for success, non-zero for failure
} TestDesc;
extern TestDesc gTestList[];
extern TestDesc gTestList[];
#ifdef __cplusplus
}
#endif

View File

@@ -5,9 +5,8 @@
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#if defined(BT_USE_SSE_IN_API) || defined(BT_USE_NEON)
#include "Test_3x3getRot.h"
#include "vector.h"
@@ -23,136 +22,136 @@
static inline btSimdFloat4 rand_f4(void)
{
return btAssign128( RANDF_m1p1, RANDF_m1p1, RANDF_m1p1, BT_NAN ); // w channel NaN
return btAssign128(RANDF_m1p1, RANDF_m1p1, RANDF_m1p1, BT_NAN); // w channel NaN
}
static inline btSimdFloat4 qtNAN_f4(void)
{
return btAssign128( BT_NAN, BT_NAN, BT_NAN, BT_NAN );
return btAssign128(BT_NAN, BT_NAN, BT_NAN, BT_NAN);
}
static void M3x3getRot_ref( const btMatrix3x3 &m, btQuaternion &q )
static void M3x3getRot_ref(const btMatrix3x3 &m, btQuaternion &q)
{
btVector3 m_el[3] = { m[0], m[1], m[2] };
btVector3 m_el[3] = {m[0], m[1], m[2]};
btScalar trace = m_el[0].x() + m_el[1].y() + m_el[2].z();
btScalar trace = m_el[0].x() + m_el[1].y() + m_el[2].z();
btScalar temp[4];
btScalar temp[4];
if (trace > btScalar(0.0))
{
btScalar s = btSqrt(trace + btScalar(1.0));
temp[3]=(s * btScalar(0.5));
s = btScalar(0.5) / s;
if (trace > btScalar(0.0))
{
btScalar s = btSqrt(trace + btScalar(1.0));
temp[3] = (s * btScalar(0.5));
s = btScalar(0.5) / s;
temp[0]=((m_el[2].y() - m_el[1].z()) * s);
temp[1]=((m_el[0].z() - m_el[2].x()) * s);
temp[2]=((m_el[1].x() - m_el[0].y()) * s);
}
else
{
int i = m_el[0].x() < m_el[1].y() ?
(m_el[1].y() < m_el[2].z() ? 2 : 1) :
(m_el[0].x() < m_el[2].z() ? 2 : 0);
int j = (i + 1) % 3;
int k = (i + 2) % 3;
temp[0] = ((m_el[2].y() - m_el[1].z()) * s);
temp[1] = ((m_el[0].z() - m_el[2].x()) * s);
temp[2] = ((m_el[1].x() - m_el[0].y()) * s);
}
else
{
int i = m_el[0].x() < m_el[1].y() ? (m_el[1].y() < m_el[2].z() ? 2 : 1) : (m_el[0].x() < m_el[2].z() ? 2 : 0);
int j = (i + 1) % 3;
int k = (i + 2) % 3;
btScalar s = btSqrt(m_el[i][i] - m_el[j][j] - m_el[k][k] + btScalar(1.0));
temp[i] = s * btScalar(0.5);
s = btScalar(0.5) / s;
btScalar s = btSqrt(m_el[i][i] - m_el[j][j] - m_el[k][k] + btScalar(1.0));
temp[i] = s * btScalar(0.5);
s = btScalar(0.5) / s;
temp[3] = (m_el[k][j] - m_el[j][k]) * s;
temp[j] = (m_el[j][i] + m_el[i][j]) * s;
temp[k] = (m_el[k][i] + m_el[i][k]) * s;
}
q.setValue(temp[0],temp[1],temp[2],temp[3]);
temp[3] = (m_el[k][j] - m_el[j][k]) * s;
temp[j] = (m_el[j][i] + m_el[i][j]) * s;
temp[k] = (m_el[k][i] + m_el[i][k]) * s;
}
q.setValue(temp[0], temp[1], temp[2], temp[3]);
}
static int operator!= ( const btQuaternion &a, const btQuaternion &b )
static int operator!=(const btQuaternion &a, const btQuaternion &b)
{
if( fabs(a.x() - b.x()) +
fabs(a.y() - b.y()) +
fabs(a.z() - b.z()) +
fabs(a.w() - b.w()) > FLT_EPSILON * 4)
return 1;
return 0;
if (fabs(a.x() - b.x()) +
fabs(a.y() - b.y()) +
fabs(a.z() - b.z()) +
fabs(a.w() - b.w()) >
FLT_EPSILON * 4)
return 1;
return 0;
}
int Test_3x3getRot(void)
{
// Init an array flanked by guard pages
btMatrix3x3 in1[ARRAY_SIZE];
btQuaternion out[ARRAY_SIZE];
btQuaternion out2[ARRAY_SIZE];
// Init the data
size_t i, j;
for( i = 0; i < ARRAY_SIZE; i++ )
{
in1[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
out[i] = btQuaternion(qtNAN_f4());
out2[i] = btQuaternion(qtNAN_f4());
M3x3getRot_ref(in1[i], out[i]);
in1[i].getRotation(out2[i]);
// Init an array flanked by guard pages
btMatrix3x3 in1[ARRAY_SIZE];
btQuaternion out[ARRAY_SIZE];
btQuaternion out2[ARRAY_SIZE];
if( out[i] != out2[i] )
{
vlog( "Error - M3x3getRot result error! ");
vlog( "failure @ %ld\n", i);
vlog( "\ncorrect = (%10.7f, %10.7f, %10.7f, %10.7f) "
"\ntested = (%10.7f, %10.7f, %10.7f, %10.7f) \n",
out[i].x(), out[i].y(), out[i].z(), out[i].w(),
out2[i].x(), out2[i].y(), out2[i].z(), out2[i].w());
return -1;
}
}
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = ~(bestTime&0);//-1ULL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
M3x3getRot_ref(in1[i], out[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
bestTime = ~(bestTime&0);//-1ULL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
{
in1[i].getRotation(out2[i]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog( "Timing:\n" );
vlog( "\t scalar\t vector\n" );
vlog( "\t%10.2f\t%10.2f\n", TicksToCycles( scalarTime ) / ARRAY_SIZE, TicksToCycles( vectorTime ) / ARRAY_SIZE );
return 0;
// Init the data
size_t i, j;
for (i = 0; i < ARRAY_SIZE; i++)
{
in1[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4());
out[i] = btQuaternion(qtNAN_f4());
out2[i] = btQuaternion(qtNAN_f4());
M3x3getRot_ref(in1[i], out[i]);
in1[i].getRotation(out2[i]);
if (out[i] != out2[i])
{
vlog("Error - M3x3getRot result error! ");
vlog("failure @ %ld\n", i);
vlog(
"\ncorrect = (%10.7f, %10.7f, %10.7f, %10.7f) "
"\ntested = (%10.7f, %10.7f, %10.7f, %10.7f) \n",
out[i].x(), out[i].y(), out[i].z(), out[i].w(),
out2[i].x(), out2[i].y(), out2[i].z(), out2[i].w());
return -1;
}
}
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = ~(bestTime & 0); //-1ULL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for (i = 0; i < ARRAY_SIZE; i++)
M3x3getRot_ref(in1[i], out[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
bestTime = ~(bestTime & 0); //-1ULL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for (i = 0; i < ARRAY_SIZE; i++)
{
in1[i].getRotation(out2[i]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog("Timing:\n");
vlog("\t scalar\t vector\n");
vlog("\t%10.2f\t%10.2f\n", TicksToCycles(scalarTime) / ARRAY_SIZE, TicksToCycles(vectorTime) / ARRAY_SIZE);
return 0;
}
#endif//BT_USE_SSE
#endif //BT_USE_SSE

View File

@@ -9,14 +9,14 @@
#define BulletTest_Test_3x3getRot_h
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
int Test_3x3getRot(void);
int Test_3x3getRot(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -5,9 +5,8 @@
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#if defined(BT_USE_SSE_IN_API) || defined(BT_USE_NEON)
#include "Test_3x3mulM.h"
#include "vector.h"
@@ -23,147 +22,148 @@
static inline btSimdFloat4 rand_f4(void)
{
return btAssign128( RANDF_01, RANDF_01, RANDF_01, BT_NAN ); // w channel NaN
return btAssign128(RANDF_01, RANDF_01, RANDF_01, BT_NAN); // w channel NaN
}
static btMatrix3x3 M3x3mulM_ref( btMatrix3x3 &in, const btMatrix3x3 &m )
static btMatrix3x3 M3x3mulM_ref(btMatrix3x3 &in, const btMatrix3x3 &m)
{
btVector3 m_el[3] = { in[0], in[1], in[2] };
btVector3 m_el[3] = {in[0], in[1], in[2]};
in.setValue(
m.tdotx(m_el[0]), m.tdoty(m_el[0]), m.tdotz(m_el[0]),
m.tdotx(m_el[0]), m.tdoty(m_el[0]), m.tdotz(m_el[0]),
m.tdotx(m_el[1]), m.tdoty(m_el[1]), m.tdotz(m_el[1]),
m.tdotx(m_el[2]), m.tdoty(m_el[2]), m.tdotz(m_el[2]));
return in;
return in;
}
static SIMD_FORCE_INLINE bool fuzzyEqualSlow(const btVector3& ref, const btVector3& other)
static SIMD_FORCE_INLINE bool fuzzyEqualSlow(const btVector3 &ref, const btVector3 &other)
{
const btScalar epsilon = SIMD_EPSILON;
return ((btFabs(ref.m_floats[3]-other.m_floats[3])<=epsilon) &&
(btFabs(ref.m_floats[2]-other.m_floats[2])<=epsilon) &&
(btFabs(ref.m_floats[1]-other.m_floats[1])<=epsilon) &&
(btFabs(ref.m_floats[0]-other.m_floats[0])<=epsilon));
return ((btFabs(ref.m_floats[3] - other.m_floats[3]) <= epsilon) &&
(btFabs(ref.m_floats[2] - other.m_floats[2]) <= epsilon) &&
(btFabs(ref.m_floats[1] - other.m_floats[1]) <= epsilon) &&
(btFabs(ref.m_floats[0] - other.m_floats[0]) <= epsilon));
}
static int operator!= ( const btMatrix3x3 &a, const btMatrix3x3 &b )
static int operator!=(const btMatrix3x3 &a, const btMatrix3x3 &b)
{
if( a.getRow(0) != b.getRow(0) )
if (a.getRow(0) != b.getRow(0))
{
if (!fuzzyEqualSlow(a.getRow(0),b.getRow(0)))
if (!fuzzyEqualSlow(a.getRow(0), b.getRow(0)))
{
return 1;
}
}
if( a.getRow(1) != b.getRow(1) )
if (a.getRow(1) != b.getRow(1))
{
if( !fuzzyEqualSlow(a.getRow(1),b.getRow(1)) )
return 1;
if (!fuzzyEqualSlow(a.getRow(1), b.getRow(1)))
return 1;
}
if( a.getRow(2) != b.getRow(2) )
if (a.getRow(2) != b.getRow(2))
{
if( !fuzzyEqualSlow(a.getRow(2),b.getRow(2)) )
if (!fuzzyEqualSlow(a.getRow(2), b.getRow(2)))
{
return 1;
}
}
return 0;
return 0;
}
int Test_3x3mulM(void)
{
// Init an array flanked by guard pages
btMatrix3x3 in1[ARRAY_SIZE];
btMatrix3x3 in2[ARRAY_SIZE];
btMatrix3x3 in3[ARRAY_SIZE];
btMatrix3x3 out[ARRAY_SIZE];
btMatrix3x3 out2[ARRAY_SIZE];
// Init the data
size_t i, j;
for( i = 0; i < ARRAY_SIZE; i++ )
{
in1[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
in2[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
in3[i] = in1[i];
out[i] = M3x3mulM_ref(in1[i], in2[i]);
out2[i] = (in3[i] *= in2[i]);
if( out[i] != out2[i] )
{
vlog( "Error - M3x3mulM result error! ");
vlog( "failure @ %ld\n", i);
btVector3 m0, m1, m2;
m0 = out[i].getRow(0);
m1 = out[i].getRow(1);
m2 = out[i].getRow(2);
vlog( "\ncorrect = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) \n",
m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3],
m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3],
m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]);
// Init an array flanked by guard pages
btMatrix3x3 in1[ARRAY_SIZE];
btMatrix3x3 in2[ARRAY_SIZE];
btMatrix3x3 in3[ARRAY_SIZE];
btMatrix3x3 out[ARRAY_SIZE];
btMatrix3x3 out2[ARRAY_SIZE];
m0 = out2[i].getRow(0);
m1 = out2[i].getRow(1);
m2 = out2[i].getRow(2);
vlog( "\ntested = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) \n",
m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3],
m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3],
m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]);
// Init the data
size_t i, j;
for (i = 0; i < ARRAY_SIZE; i++)
{
in1[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4());
in2[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4());
in3[i] = in1[i];
return -1;
}
}
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out[i] = M3x3mulM_ref(in1[i], in2[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out2[i] = (in3[i] *= in2[i]);
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog( "Timing:\n" );
vlog( "\t scalar\t vector\n" );
vlog( "\t%10.2f\t%10.2f\n", TicksToCycles( scalarTime ) / ARRAY_SIZE, TicksToCycles( vectorTime ) / ARRAY_SIZE );
return 0;
out[i] = M3x3mulM_ref(in1[i], in2[i]);
out2[i] = (in3[i] *= in2[i]);
if (out[i] != out2[i])
{
vlog("Error - M3x3mulM result error! ");
vlog("failure @ %ld\n", i);
btVector3 m0, m1, m2;
m0 = out[i].getRow(0);
m1 = out[i].getRow(1);
m2 = out[i].getRow(2);
vlog(
"\ncorrect = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) \n",
m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3],
m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3],
m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]);
m0 = out2[i].getRow(0);
m1 = out2[i].getRow(1);
m2 = out2[i].getRow(2);
vlog(
"\ntested = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) \n",
m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3],
m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3],
m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]);
return -1;
}
}
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for (i = 0; i < ARRAY_SIZE; i++)
out[i] = M3x3mulM_ref(in1[i], in2[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for (i = 0; i < ARRAY_SIZE; i++)
out2[i] = (in3[i] *= in2[i]);
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog("Timing:\n");
vlog("\t scalar\t vector\n");
vlog("\t%10.2f\t%10.2f\n", TicksToCycles(scalarTime) / ARRAY_SIZE, TicksToCycles(vectorTime) / ARRAY_SIZE);
return 0;
}
#endif //BT_USE_SSE
#endif //BT_USE_SSE

View File

@@ -9,14 +9,14 @@
#define BulletTest_Test_3x3mulM_h
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
int Test_3x3mulM(void);
int Test_3x3mulM(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -5,10 +5,8 @@
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#if defined(BT_USE_SSE_IN_API) || defined(BT_USE_NEON)
#include "Test_3x3mulM1M2.h"
#include "vector.h"
@@ -24,141 +22,142 @@
static inline btSimdFloat4 rand_f4(void)
{
return btAssign128( RANDF_01, RANDF_01, RANDF_01, BT_NAN ); // w channel NaN
return btAssign128(RANDF_01, RANDF_01, RANDF_01, BT_NAN); // w channel NaN
}
static btMatrix3x3 M3x3mulM1M2_ref( const btMatrix3x3 &m1, const btMatrix3x3 &m2 )
static btMatrix3x3 M3x3mulM1M2_ref(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
{
return btMatrix3x3(
m2.tdotx(m1[0]), m2.tdoty(m1[0]), m2.tdotz(m1[0]),
m2.tdotx(m1[0]), m2.tdoty(m1[0]), m2.tdotz(m1[0]),
m2.tdotx(m1[1]), m2.tdoty(m1[1]), m2.tdotz(m1[1]),
m2.tdotx(m1[2]), m2.tdoty(m1[2]), m2.tdotz(m1[2]));
}
static bool fuzzyEqualSlow(const btVector3& ref, const btVector3& other)
static bool fuzzyEqualSlow(const btVector3 &ref, const btVector3 &other)
{
const btScalar epsilon = SIMD_EPSILON;
return ((btFabs(ref.m_floats[3]-other.m_floats[3])<=epsilon) &&
(btFabs(ref.m_floats[2]-other.m_floats[2])<=epsilon) &&
(btFabs(ref.m_floats[1]-other.m_floats[1])<=epsilon) &&
(btFabs(ref.m_floats[0]-other.m_floats[0])<=epsilon));
return ((btFabs(ref.m_floats[3] - other.m_floats[3]) <= epsilon) &&
(btFabs(ref.m_floats[2] - other.m_floats[2]) <= epsilon) &&
(btFabs(ref.m_floats[1] - other.m_floats[1]) <= epsilon) &&
(btFabs(ref.m_floats[0] - other.m_floats[0]) <= epsilon));
}
static int operator!= ( const btMatrix3x3 &a, const btMatrix3x3 &b )
static int operator!=(const btMatrix3x3 &a, const btMatrix3x3 &b)
{
if( a.getRow(0) != b.getRow(0) )
if (a.getRow(0) != b.getRow(0))
{
if (!fuzzyEqualSlow(a.getRow(0),b.getRow(0)))
if (!fuzzyEqualSlow(a.getRow(0), b.getRow(0)))
{
return 1;
}
}
if( a.getRow(1) != b.getRow(1) )
if (a.getRow(1) != b.getRow(1))
{
if( !fuzzyEqualSlow(a.getRow(1),b.getRow(1)) )
return 1;
if (!fuzzyEqualSlow(a.getRow(1), b.getRow(1)))
return 1;
}
if( a.getRow(2) != b.getRow(2) )
if (a.getRow(2) != b.getRow(2))
{
if( !fuzzyEqualSlow(a.getRow(2),b.getRow(2)) )
if (!fuzzyEqualSlow(a.getRow(2), b.getRow(2)))
{
return 1;
}
}
return 0;
return 0;
}
int Test_3x3mulM1M2(void)
{
// Init an array flanked by guard pages
btMatrix3x3 in1[ARRAY_SIZE];
btMatrix3x3 in2[ARRAY_SIZE];
btMatrix3x3 out[ARRAY_SIZE];
btMatrix3x3 out2[ARRAY_SIZE];
// Init the data
size_t i, j;
for( i = 0; i < ARRAY_SIZE; i++ )
{
in1[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
in2[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
out[i] = M3x3mulM1M2_ref(in1[i], in2[i]);
out2[i] = (in1[i] * in2[i]);
if( out[i] != out2[i] )
{
vlog( "Error - M3x3mulM1M2 result error! ");
vlog( "failure @ %ld\n", i);
btVector3 m0, m1, m2;
m0 = out[i].getRow(0);
m1 = out[i].getRow(1);
m2 = out[i].getRow(2);
vlog( "\ncorrect = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) \n",
m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3],
m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3],
m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]);
// Init an array flanked by guard pages
btMatrix3x3 in1[ARRAY_SIZE];
btMatrix3x3 in2[ARRAY_SIZE];
btMatrix3x3 out[ARRAY_SIZE];
btMatrix3x3 out2[ARRAY_SIZE];
m0 = out2[i].getRow(0);
m1 = out2[i].getRow(1);
m2 = out2[i].getRow(2);
vlog( "\ntested = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) \n",
m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3],
m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3],
m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]);
// Init the data
size_t i, j;
for (i = 0; i < ARRAY_SIZE; i++)
{
in1[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4());
in2[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4());
return -1;
}
}
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out[i] = M3x3mulM1M2_ref(in1[i], in2[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out2[i] = (in1[i] * in2[i]);
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog( "Timing:\n" );
vlog( "\t scalar\t vector\n" );
vlog( "\t%10.2f\t%10.2f\n", TicksToCycles( scalarTime ) / ARRAY_SIZE, TicksToCycles( vectorTime ) / ARRAY_SIZE );
return 0;
out[i] = M3x3mulM1M2_ref(in1[i], in2[i]);
out2[i] = (in1[i] * in2[i]);
if (out[i] != out2[i])
{
vlog("Error - M3x3mulM1M2 result error! ");
vlog("failure @ %ld\n", i);
btVector3 m0, m1, m2;
m0 = out[i].getRow(0);
m1 = out[i].getRow(1);
m2 = out[i].getRow(2);
vlog(
"\ncorrect = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) \n",
m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3],
m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3],
m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]);
m0 = out2[i].getRow(0);
m1 = out2[i].getRow(1);
m2 = out2[i].getRow(2);
vlog(
"\ntested = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) \n",
m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3],
m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3],
m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]);
return -1;
}
}
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for (i = 0; i < ARRAY_SIZE; i++)
out[i] = M3x3mulM1M2_ref(in1[i], in2[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for (i = 0; i < ARRAY_SIZE; i++)
out2[i] = (in1[i] * in2[i]);
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog("Timing:\n");
vlog("\t scalar\t vector\n");
vlog("\t%10.2f\t%10.2f\n", TicksToCycles(scalarTime) / ARRAY_SIZE, TicksToCycles(vectorTime) / ARRAY_SIZE);
return 0;
}
#endif //BT_USE_SSE
#endif //BT_USE_SSE

View File

@@ -9,14 +9,14 @@
#define BulletTest_Test_3x3mulM1M2_h
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
int Test_3x3mulM1M2(void);
int Test_3x3mulM1M2(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -5,11 +5,8 @@
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#if defined(BT_USE_SSE_IN_API) || defined(BT_USE_NEON)
#include "Test_3x3mulMV.h"
#include "vector.h"
@@ -25,88 +22,90 @@
static inline btSimdFloat4 rand_f4(void)
{
return btAssign128(RANDF_01, RANDF_01, RANDF_01, BT_NAN ); // w channel NaN
return btAssign128(RANDF_01, RANDF_01, RANDF_01, BT_NAN); // w channel NaN
}
static btVector3 M3x3mulMV_ref( const btMatrix3x3 &m, const btVector3 &v )
static btVector3 M3x3mulMV_ref(const btMatrix3x3 &m, const btVector3 &v)
{
return btVector3(m[0].dot(v), m[1].dot(v), m[2].dot(v));
}
int Test_3x3mulMV(void)
{
// Init an array flanked by guard pages
btMatrix3x3 in1[ARRAY_SIZE];
btVector3 in2[ARRAY_SIZE];
btVector3 out[ARRAY_SIZE];
btVector3 out2[ARRAY_SIZE];
// Init the data
size_t i, j;
for( i = 0; i < ARRAY_SIZE; i++ )
{
in1[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
in2[i] = btVector3(rand_f4());
out[i] = M3x3mulMV_ref(in1[i], in2[i]);
out2[i] = (in1[i] * in2[i]);
// Init an array flanked by guard pages
btMatrix3x3 in1[ARRAY_SIZE];
btVector3 in2[ARRAY_SIZE];
btVector3 out[ARRAY_SIZE];
btVector3 out2[ARRAY_SIZE];
// Init the data
size_t i, j;
for (i = 0; i < ARRAY_SIZE; i++)
{
in1[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4());
in2[i] = btVector3(rand_f4());
out[i] = M3x3mulMV_ref(in1[i], in2[i]);
out2[i] = (in1[i] * in2[i]);
if (fabsf(out[i].m_floats[0] - out2[i].m_floats[0]) +
fabsf(out[i].m_floats[1] - out2[i].m_floats[1]) +
fabsf(out[i].m_floats[2] - out2[i].m_floats[2]) +
fabsf(out[i].m_floats[3] - out2[i].m_floats[3]) >
FLT_EPSILON * 4)
{
vlog("Error - M3x3mulMV result error! ");
vlog("failure @ %ld\n", i);
vlog(
"\ncorrect = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f, %10.4f) \n",
out[i].m_floats[0], out[i].m_floats[1], out[i].m_floats[2], out[i].m_floats[3],
out2[i].m_floats[0], out2[i].m_floats[1], out2[i].m_floats[2], out2[i].m_floats[3]);
if( fabsf(out[i].m_floats[0] - out2[i].m_floats[0]) +
fabsf(out[i].m_floats[1] - out2[i].m_floats[1]) +
fabsf(out[i].m_floats[2] - out2[i].m_floats[2]) +
fabsf(out[i].m_floats[3] - out2[i].m_floats[3]) > FLT_EPSILON*4 )
{
vlog( "Error - M3x3mulMV result error! ");
vlog( "failure @ %ld\n", i);
vlog( "\ncorrect = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f, %10.4f) \n",
out[i].m_floats[0], out[i].m_floats[1], out[i].m_floats[2], out[i].m_floats[3],
out2[i].m_floats[0], out2[i].m_floats[1], out2[i].m_floats[2], out2[i].m_floats[3]);
return 1;
}
}
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out[i] = M3x3mulMV_ref(in1[i], in2[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out2[i] = (in1[i] * in2[i]);
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog( "Timing:\n" );
vlog( "\t scalar\t vector\n" );
vlog( "\t%10.2f\t%10.2f\n", TicksToCycles( scalarTime ) / ARRAY_SIZE, TicksToCycles( vectorTime ) / ARRAY_SIZE );
return 0;
}
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for (i = 0; i < ARRAY_SIZE; i++)
out[i] = M3x3mulMV_ref(in1[i], in2[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for (i = 0; i < ARRAY_SIZE; i++)
out2[i] = (in1[i] * in2[i]);
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog("Timing:\n");
vlog("\t scalar\t vector\n");
vlog("\t%10.2f\t%10.2f\n", TicksToCycles(scalarTime) / ARRAY_SIZE, TicksToCycles(vectorTime) / ARRAY_SIZE);
return 0;
}
#endif //BT_USE_SSE
#endif //BT_USE_SSE

View File

@@ -9,15 +9,14 @@
#define BulletTest_Test_3x3mulMV_h
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
int Test_3x3mulMV(void);
int Test_3x3mulMV(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -5,11 +5,8 @@
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#if defined(BT_USE_SSE_IN_API) || defined(BT_USE_NEON)
#include "Test_3x3mulVM.h"
#include "vector.h"
@@ -25,88 +22,90 @@
static inline btSimdFloat4 rand_f4(void)
{
return btAssign128( RANDF_01, RANDF_01, RANDF_01, BT_NAN ); // w channel NaN
return btAssign128(RANDF_01, RANDF_01, RANDF_01, BT_NAN); // w channel NaN
}
static btVector3 M3x3mulVM_ref( const btVector3 &v, const btMatrix3x3 &m)
static btVector3 M3x3mulVM_ref(const btVector3 &v, const btMatrix3x3 &m)
{
return btVector3(m.tdotx(v), m.tdoty(v), m.tdotz(v));
}
int Test_3x3mulVM(void)
{
// Init an array flanked by guard pages
btVector3 in1[ARRAY_SIZE];
btMatrix3x3 in2[ARRAY_SIZE];
btVector3 out[ARRAY_SIZE];
btVector3 out2[ARRAY_SIZE];
// Init the data
size_t i, j;
for( i = 0; i < ARRAY_SIZE; i++ )
{
in1[i] = btVector3(rand_f4());
in2[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
out[i] = M3x3mulVM_ref(in1[i], in2[i]);
out2[i] = (in1[i] * in2[i]);
// Init an array flanked by guard pages
btVector3 in1[ARRAY_SIZE];
btMatrix3x3 in2[ARRAY_SIZE];
btVector3 out[ARRAY_SIZE];
btVector3 out2[ARRAY_SIZE];
// Init the data
size_t i, j;
for (i = 0; i < ARRAY_SIZE; i++)
{
in1[i] = btVector3(rand_f4());
in2[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4());
out[i] = M3x3mulVM_ref(in1[i], in2[i]);
out2[i] = (in1[i] * in2[i]);
if (fabsf(out[i].m_floats[0] - out2[i].m_floats[0]) +
fabsf(out[i].m_floats[1] - out2[i].m_floats[1]) +
fabsf(out[i].m_floats[2] - out2[i].m_floats[2]) +
fabsf(out[i].m_floats[3] - out2[i].m_floats[3]) >
FLT_EPSILON * 4)
{
vlog("Error - M3x3mulVM result error! ");
vlog("failure @ %ld\n", i);
vlog(
"\ncorrect = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f, %10.4f) \n",
out[i].m_floats[0], out[i].m_floats[1], out[i].m_floats[2], out[i].m_floats[3],
out2[i].m_floats[0], out2[i].m_floats[1], out2[i].m_floats[2], out2[i].m_floats[3]);
if( fabsf(out[i].m_floats[0] - out2[i].m_floats[0]) +
fabsf(out[i].m_floats[1] - out2[i].m_floats[1]) +
fabsf(out[i].m_floats[2] - out2[i].m_floats[2]) +
fabsf(out[i].m_floats[3] - out2[i].m_floats[3]) > FLT_EPSILON*4 )
{
vlog( "Error - M3x3mulVM result error! ");
vlog( "failure @ %ld\n", i);
vlog( "\ncorrect = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f, %10.4f) \n",
out[i].m_floats[0], out[i].m_floats[1], out[i].m_floats[2], out[i].m_floats[3],
out2[i].m_floats[0], out2[i].m_floats[1], out2[i].m_floats[2], out2[i].m_floats[3]);
return 1;
}
}
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out[i] = M3x3mulVM_ref(in1[i], in2[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out2[i] = (in1[i] * in2[i]);
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog( "Timing:\n" );
vlog( "\t scalar\t vector\n" );
vlog( "\t%10.2f\t%10.2f\n", TicksToCycles( scalarTime ) / ARRAY_SIZE, TicksToCycles( vectorTime ) / ARRAY_SIZE );
return 0;
}
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for (i = 0; i < ARRAY_SIZE; i++)
out[i] = M3x3mulVM_ref(in1[i], in2[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for (i = 0; i < ARRAY_SIZE; i++)
out2[i] = (in1[i] * in2[i]);
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog("Timing:\n");
vlog("\t scalar\t vector\n");
vlog("\t%10.2f\t%10.2f\n", TicksToCycles(scalarTime) / ARRAY_SIZE, TicksToCycles(vectorTime) / ARRAY_SIZE);
return 0;
}
#endif //BT_USE_SSE
#endif //BT_USE_SSE

View File

@@ -9,14 +9,14 @@
#define BulletTest_Test_3x3mulVM_h
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
int Test_3x3mulVM(void);
int Test_3x3mulVM(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -5,10 +5,8 @@
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#if defined(BT_USE_SSE_IN_API) || defined(BT_USE_NEON)
#include "Test_3x3setRot.h"
#include "vector.h"
@@ -24,148 +22,149 @@
static inline btSimdFloat4 rand_f4(void)
{
return btAssign128( RANDF_01, RANDF_01, RANDF_01, BT_NAN ); // w channel NaN
return btAssign128(RANDF_01, RANDF_01, RANDF_01, BT_NAN); // w channel NaN
}
static inline btSimdFloat4 qtrand_f4(void)
{
return btAssign128( RANDF_01, RANDF_01, RANDF_01, RANDF_01 );
return btAssign128(RANDF_01, RANDF_01, RANDF_01, RANDF_01);
}
static btMatrix3x3 M3x3setRot_ref( btMatrix3x3 &m, const btQuaternion &q )
static btMatrix3x3 M3x3setRot_ref(btMatrix3x3 &m, const btQuaternion &q)
{
btScalar d = q.length2();
btScalar s = btScalar(2.0) / d;
btScalar d = q.length2();
btScalar s = btScalar(2.0) / d;
btScalar xs = q.x() * s, ys = q.y() * s, zs = q.z() * s;
btScalar wx = q.w() * xs, wy = q.w() * ys, wz = q.w() * zs;
btScalar xx = q.x() * xs, xy = q.x() * ys, xz = q.x() * zs;
btScalar yy = q.y() * ys, yz = q.y() * zs, zz = q.z() * zs;
m.setValue(
btScalar(1.0) - (yy + zz), xy - wz, xz + wy,
xy + wz, btScalar(1.0) - (xx + zz), yz - wx,
xz - wy, yz + wx, btScalar(1.0) - (xx + yy));
btScalar xs = q.x() * s, ys = q.y() * s, zs = q.z() * s;
return m;
btScalar wx = q.w() * xs, wy = q.w() * ys, wz = q.w() * zs;
btScalar xx = q.x() * xs, xy = q.x() * ys, xz = q.x() * zs;
btScalar yy = q.y() * ys, yz = q.y() * zs, zz = q.z() * zs;
m.setValue(
btScalar(1.0) - (yy + zz), xy - wz, xz + wy,
xy + wz, btScalar(1.0) - (xx + zz), yz - wx,
xz - wy, yz + wx, btScalar(1.0) - (xx + yy));
return m;
}
static int operator!= ( const btMatrix3x3 &a, const btMatrix3x3 &b )
static int operator!=(const btMatrix3x3 &a, const btMatrix3x3 &b)
{
int i;
btVector3 av3, bv3;
int i;
btVector3 av3, bv3;
for(i=0; i<3; i++)
{
av3 = a.getRow(i);
bv3 = b.getRow(i);
if( fabs(av3.m_floats[0] - bv3.m_floats[0]) +
fabs(av3.m_floats[1] - bv3.m_floats[1]) +
fabs(av3.m_floats[2] - bv3.m_floats[2]) > FLT_EPSILON * 4)
return 1;
}
return 0;
for (i = 0; i < 3; i++)
{
av3 = a.getRow(i);
bv3 = b.getRow(i);
if (fabs(av3.m_floats[0] - bv3.m_floats[0]) +
fabs(av3.m_floats[1] - bv3.m_floats[1]) +
fabs(av3.m_floats[2] - bv3.m_floats[2]) >
FLT_EPSILON * 4)
return 1;
}
return 0;
}
int Test_3x3setRot(void)
{
// Init an array flanked by guard pages
btMatrix3x3 in1[ARRAY_SIZE];
btQuaternion in2[ARRAY_SIZE];
btMatrix3x3 in3[ARRAY_SIZE];
btMatrix3x3 out[ARRAY_SIZE];
btMatrix3x3 out2[ARRAY_SIZE];
// Init the data
size_t i, j;
for( i = 0; i < ARRAY_SIZE; i++ )
{
in1[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
in2[i] = btQuaternion(qtrand_f4());
in3[i] = in1[i];
out[i] = M3x3setRot_ref(in1[i], in2[i]);
in3[i].setRotation(in2[i]);
out2[i] = in3[i];
// Init an array flanked by guard pages
btMatrix3x3 in1[ARRAY_SIZE];
btQuaternion in2[ARRAY_SIZE];
btMatrix3x3 in3[ARRAY_SIZE];
btMatrix3x3 out[ARRAY_SIZE];
btMatrix3x3 out2[ARRAY_SIZE];
if( out[i] != out2[i] )
{
vlog( "Error - M3x3setRot result error! ");
vlog( "failure @ %ld\n", i);
btVector3 m0, m1, m2;
m0 = out[i].getRow(0);
m1 = out[i].getRow(1);
m2 = out[i].getRow(2);
vlog( "\ncorrect = (%10.7f, %10.7f, %10.7f, %10.7f) "
"\n (%10.7f, %10.7f, %10.7f, %10.7f) "
"\n (%10.7f, %10.7f, %10.7f, %10.7f) \n",
m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3],
m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3],
m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]);
// Init the data
size_t i, j;
for (i = 0; i < ARRAY_SIZE; i++)
{
in1[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4());
in2[i] = btQuaternion(qtrand_f4());
in3[i] = in1[i];
m0 = out2[i].getRow(0);
m1 = out2[i].getRow(1);
m2 = out2[i].getRow(2);
vlog( "\ntested = (%10.7f, %10.7f, %10.7f, %10.7f) "
"\n (%10.7f, %10.7f, %10.7f, %10.7f) "
"\n (%10.7f, %10.7f, %10.7f, %10.7f) \n",
m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3],
m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3],
m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]);
out[i] = M3x3setRot_ref(in1[i], in2[i]);
in3[i].setRotation(in2[i]);
out2[i] = in3[i];
return -1;
}
}
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out[i] = M3x3setRot_ref(in1[i], in2[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
{
in3[i].setRotation(in2[i]);
out2[i] = in3[i];
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog( "Timing:\n" );
vlog( "\t scalar\t vector\n" );
vlog( "\t%10.2f\t%10.2f\n", TicksToCycles( scalarTime ) / ARRAY_SIZE, TicksToCycles( vectorTime ) / ARRAY_SIZE );
return 0;
if (out[i] != out2[i])
{
vlog("Error - M3x3setRot result error! ");
vlog("failure @ %ld\n", i);
btVector3 m0, m1, m2;
m0 = out[i].getRow(0);
m1 = out[i].getRow(1);
m2 = out[i].getRow(2);
vlog(
"\ncorrect = (%10.7f, %10.7f, %10.7f, %10.7f) "
"\n (%10.7f, %10.7f, %10.7f, %10.7f) "
"\n (%10.7f, %10.7f, %10.7f, %10.7f) \n",
m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3],
m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3],
m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]);
m0 = out2[i].getRow(0);
m1 = out2[i].getRow(1);
m2 = out2[i].getRow(2);
vlog(
"\ntested = (%10.7f, %10.7f, %10.7f, %10.7f) "
"\n (%10.7f, %10.7f, %10.7f, %10.7f) "
"\n (%10.7f, %10.7f, %10.7f, %10.7f) \n",
m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3],
m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3],
m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]);
return -1;
}
}
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for (i = 0; i < ARRAY_SIZE; i++)
out[i] = M3x3setRot_ref(in1[i], in2[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for (i = 0; i < ARRAY_SIZE; i++)
{
in3[i].setRotation(in2[i]);
out2[i] = in3[i];
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog("Timing:\n");
vlog("\t scalar\t vector\n");
vlog("\t%10.2f\t%10.2f\n", TicksToCycles(scalarTime) / ARRAY_SIZE, TicksToCycles(vectorTime) / ARRAY_SIZE);
return 0;
}
#endif //BT_USE_SSE
#endif //BT_USE_SSE

View File

@@ -9,14 +9,14 @@
#define BulletTest_Test_3x3setRot_h
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
int Test_3x3setRot(void);
int Test_3x3setRot(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -5,11 +5,8 @@
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#if defined(BT_USE_SSE_IN_API) || defined(BT_USE_NEON)
#include "Test_3x3timesTranspose.h"
#include "vector.h"
@@ -25,93 +22,95 @@
static inline btSimdFloat4 rand_f4(void)
{
return btAssign128( RANDF, RANDF, RANDF, BT_NAN ); // w channel NaN
return btAssign128(RANDF, RANDF, RANDF, BT_NAN); // w channel NaN
}
static btMatrix3x3 timesTranspose( const btMatrix3x3 &in, const btMatrix3x3 &m )
static btMatrix3x3 timesTranspose(const btMatrix3x3 &in, const btMatrix3x3 &m)
{
btVector3 m_el[3] = { in[0], in[1], in[2] };
btVector3 m_el[3] = {in[0], in[1], in[2]};
return btMatrix3x3(
m_el[0].dot(m[0]), m_el[0].dot(m[1]), m_el[0].dot(m[2]),
m_el[1].dot(m[0]), m_el[1].dot(m[1]), m_el[1].dot(m[2]),
m_el[2].dot(m[0]), m_el[2].dot(m[1]), m_el[2].dot(m[2]));
m_el[0].dot(m[0]), m_el[0].dot(m[1]), m_el[0].dot(m[2]),
m_el[1].dot(m[0]), m_el[1].dot(m[1]), m_el[1].dot(m[2]),
m_el[2].dot(m[0]), m_el[2].dot(m[1]), m_el[2].dot(m[2]));
}
static int operator!= ( const btMatrix3x3 &a, const btMatrix3x3 &b )
static int operator!=(const btMatrix3x3 &a, const btMatrix3x3 &b)
{
if( a.getRow(0) != b.getRow(0) )
return 1;
if( a.getRow(1) != b.getRow(1) )
return 1;
if( a.getRow(2) != b.getRow(2) )
return 1;
return 0;
if (a.getRow(0) != b.getRow(0))
return 1;
if (a.getRow(1) != b.getRow(1))
return 1;
if (a.getRow(2) != b.getRow(2))
return 1;
return 0;
}
int Test_3x3timesTranspose(void)
{
// Init an array flanked by guard pages
btMatrix3x3 in1[ARRAY_SIZE];
btMatrix3x3 in2[ARRAY_SIZE];
btMatrix3x3 out[ARRAY_SIZE];
btMatrix3x3 out2[ARRAY_SIZE];
// Init the data
size_t i, j;
for( i = 0; i < ARRAY_SIZE; i++ )
{
in1[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
in2[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
out[i] = timesTranspose(in1[i], in2[i]);
out2[i] = in1[i].timesTranspose(in2[i]);
if( out[i] != out2[i] )
{
printf( "failure @ %ld\n", i);
return -1;
}
}
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++) {
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out[i] = timesTranspose(in1[i], in2[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++) {
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out[i] = in1[i].timesTranspose(in2[i]);
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog( "Timing:\n" );
vlog( "\t scalar\t vector\n" );
vlog( "\t%10.2f\t%10.2f\n", TicksToCycles( scalarTime ) / ARRAY_SIZE, TicksToCycles( vectorTime ) / ARRAY_SIZE );
return 0;
// Init an array flanked by guard pages
btMatrix3x3 in1[ARRAY_SIZE];
btMatrix3x3 in2[ARRAY_SIZE];
btMatrix3x3 out[ARRAY_SIZE];
btMatrix3x3 out2[ARRAY_SIZE];
// Init the data
size_t i, j;
for (i = 0; i < ARRAY_SIZE; i++)
{
in1[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4());
in2[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4());
out[i] = timesTranspose(in1[i], in2[i]);
out2[i] = in1[i].timesTranspose(in2[i]);
if (out[i] != out2[i])
{
printf("failure @ %ld\n", i);
return -1;
}
}
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for (i = 0; i < ARRAY_SIZE; i++)
out[i] = timesTranspose(in1[i], in2[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for (i = 0; i < ARRAY_SIZE; i++)
out[i] = in1[i].timesTranspose(in2[i]);
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog("Timing:\n");
vlog("\t scalar\t vector\n");
vlog("\t%10.2f\t%10.2f\n", TicksToCycles(scalarTime) / ARRAY_SIZE, TicksToCycles(vectorTime) / ARRAY_SIZE);
return 0;
}
#endif //BT_USE_SSE
#endif //BT_USE_SSE

View File

@@ -9,14 +9,14 @@
#define BulletTest_Test_3x3timesTranspose_h
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
int Test_3x3timesTranspose(void);
int Test_3x3timesTranspose(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -5,10 +5,8 @@
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#if defined(BT_USE_SSE_IN_API) || defined(BT_USE_NEON)
#include "Test_3x3transpose.h"
#include "vector.h"
@@ -20,97 +18,98 @@
#include <LinearMath/btMatrix3x3.h>
#define LOOPCOUNT 1000
#define ARRAY_SIZE 1024
#define ARRAY_SIZE 1024
static inline btSimdFloat4 rand_f4(void)
{
return btAssign128( RANDF, RANDF, RANDF, BT_NAN ); // w channel NaN
return btAssign128(RANDF, RANDF, RANDF, BT_NAN); // w channel NaN
}
static btMatrix3x3 Transpose( btMatrix3x3 &in )
static btMatrix3x3 Transpose(btMatrix3x3 &in)
{
btVector3 row0 = in.getRow(0);
btVector3 row1 = in.getRow(1);
btVector3 row2 = in.getRow(2);
btVector3 col0 = btAssign128(row0.x(), row1.x(), row2.x(), 0 );
btVector3 col1 = btAssign128(row0.y(), row1.y(), row2.y(), 0 );
btVector3 row0 = in.getRow(0);
btVector3 row1 = in.getRow(1);
btVector3 row2 = in.getRow(2);
btVector3 col0 = btAssign128(row0.x(), row1.x(), row2.x(), 0);
btVector3 col1 = btAssign128(row0.y(), row1.y(), row2.y(), 0);
btVector3 col2 = btAssign128(row0.z(), row1.z(), row2.z(), 0);
return btMatrix3x3( col0, col1, col2);
return btMatrix3x3(col0, col1, col2);
}
static int operator!= ( const btMatrix3x3 &a, const btMatrix3x3 &b )
static int operator!=(const btMatrix3x3 &a, const btMatrix3x3 &b)
{
if( a.getRow(0) != b.getRow(0) )
return 1;
if( a.getRow(1) != b.getRow(1) )
return 1;
if( a.getRow(2) != b.getRow(2) )
return 1;
return 0;
if (a.getRow(0) != b.getRow(0))
return 1;
if (a.getRow(1) != b.getRow(1))
return 1;
if (a.getRow(2) != b.getRow(2))
return 1;
return 0;
}
int Test_3x3transpose(void)
{
// Init an array flanked by guard pages
btMatrix3x3 in[ARRAY_SIZE];
btMatrix3x3 out[ARRAY_SIZE];
btMatrix3x3 out2[ARRAY_SIZE];
// Init the data
size_t i, j;
for( i = 0; i < ARRAY_SIZE; i++ )
{
in[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
out[i] = Transpose(in[i]);
out2[i] = in[i].transpose();
if( out[i] != out2[i] )
{
printf( "failure @ %ld\n", i);
return -1;
}
}
// Init an array flanked by guard pages
btMatrix3x3 in[ARRAY_SIZE];
btMatrix3x3 out[ARRAY_SIZE];
btMatrix3x3 out2[ARRAY_SIZE];
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++) {
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out[i] = Transpose(in[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
// Init the data
size_t i, j;
for (i = 0; i < ARRAY_SIZE; i++)
{
in[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4());
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++) {
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out[i] = in[i].transpose();
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog( "Timing:\n" );
vlog( "\t scalar\t vector\n" );
vlog( "\t%10.2f\t%10.2f\n", TicksToCycles( scalarTime ) / ARRAY_SIZE, TicksToCycles( vectorTime ) / ARRAY_SIZE );
return 0;
out[i] = Transpose(in[i]);
out2[i] = in[i].transpose();
if (out[i] != out2[i])
{
printf("failure @ %ld\n", i);
return -1;
}
}
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for (i = 0; i < ARRAY_SIZE; i++)
out[i] = Transpose(in[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for (i = 0; i < ARRAY_SIZE; i++)
out[i] = in[i].transpose();
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog("Timing:\n");
vlog("\t scalar\t vector\n");
vlog("\t%10.2f\t%10.2f\n", TicksToCycles(scalarTime) / ARRAY_SIZE, TicksToCycles(vectorTime) / ARRAY_SIZE);
return 0;
}
#endif //BT_USE_SSE
#endif //BT_USE_SSE

View File

@@ -9,14 +9,14 @@
#define BulletTest_Test_3x3transpose_h
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
int Test_3x3transpose(void);
int Test_3x3transpose(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -5,10 +5,8 @@
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#if defined(BT_USE_SSE_IN_API) || defined(BT_USE_NEON)
#include "Test_3x3transposeTimes.h"
#include "vector.h"
@@ -24,145 +22,148 @@
static inline btSimdFloat4 rand_f4(void)
{
return btAssign128( RANDF_01, RANDF_01, RANDF_01, BT_NAN ); // w channel NaN
return btAssign128(RANDF_01, RANDF_01, RANDF_01, BT_NAN); // w channel NaN
}
static btMatrix3x3 TransposeTimesReference( const btMatrix3x3 &in, const btMatrix3x3 &m )
static btMatrix3x3 TransposeTimesReference(const btMatrix3x3 &in, const btMatrix3x3 &m)
{
btVector3 m_el[3] = { in[0], in[1], in[2] };
btSimdFloat4 r0 = btAssign128(m_el[0].x() * m[0].x() + m_el[1].x() * m[1].x() + m_el[2].x() * m[2].x(),
m_el[0].x() * m[0].y() + m_el[1].x() * m[1].y() + m_el[2].x() * m[2].y(),
m_el[0].x() * m[0].z() + m_el[1].x() * m[1].z() + m_el[2].x() * m[2].z(),
0.0f );
btSimdFloat4 r1 = btAssign128( m_el[0].y() * m[0].x() + m_el[1].y() * m[1].x() + m_el[2].y() * m[2].x(),
m_el[0].y() * m[0].y() + m_el[1].y() * m[1].y() + m_el[2].y() * m[2].y(),
m_el[0].y() * m[0].z() + m_el[1].y() * m[1].z() + m_el[2].y() * m[2].z(),
0.0f );
btSimdFloat4 r2 = btAssign128( m_el[0].z() * m[0].x() + m_el[1].z() * m[1].x() + m_el[2].z() * m[2].x(),
m_el[0].z() * m[0].y() + m_el[1].z() * m[1].y() + m_el[2].z() * m[2].y(),
m_el[0].z() * m[0].z() + m_el[1].z() * m[1].z() + m_el[2].z() * m[2].z(),
0.0f );
return btMatrix3x3( r0, r1, r2 );
btVector3 m_el[3] = {in[0], in[1], in[2]};
btSimdFloat4 r0 = btAssign128(m_el[0].x() * m[0].x() + m_el[1].x() * m[1].x() + m_el[2].x() * m[2].x(),
m_el[0].x() * m[0].y() + m_el[1].x() * m[1].y() + m_el[2].x() * m[2].y(),
m_el[0].x() * m[0].z() + m_el[1].x() * m[1].z() + m_el[2].x() * m[2].z(),
0.0f);
btSimdFloat4 r1 = btAssign128(m_el[0].y() * m[0].x() + m_el[1].y() * m[1].x() + m_el[2].y() * m[2].x(),
m_el[0].y() * m[0].y() + m_el[1].y() * m[1].y() + m_el[2].y() * m[2].y(),
m_el[0].y() * m[0].z() + m_el[1].y() * m[1].z() + m_el[2].y() * m[2].z(),
0.0f);
btSimdFloat4 r2 = btAssign128(m_el[0].z() * m[0].x() + m_el[1].z() * m[1].x() + m_el[2].z() * m[2].x(),
m_el[0].z() * m[0].y() + m_el[1].z() * m[1].y() + m_el[2].z() * m[2].y(),
m_el[0].z() * m[0].z() + m_el[1].z() * m[1].z() + m_el[2].z() * m[2].z(),
0.0f);
return btMatrix3x3(r0, r1, r2);
}
static int operator!= ( const btMatrix3x3 &a, const btMatrix3x3 &b )
static int operator!=(const btMatrix3x3 &a, const btMatrix3x3 &b)
{
if( a.getRow(0) != b.getRow(0) )
return 1;
if( a.getRow(1) != b.getRow(1) )
return 1;
if( a.getRow(2) != b.getRow(2) )
return 1;
return 0;
if (a.getRow(0) != b.getRow(0))
return 1;
if (a.getRow(1) != b.getRow(1))
return 1;
if (a.getRow(2) != b.getRow(2))
return 1;
return 0;
}
int Test_3x3transposeTimes(void)
{
// Init an array flanked by guard pages
btMatrix3x3 in1[ARRAY_SIZE];
btMatrix3x3 in2[ARRAY_SIZE];
btMatrix3x3 out[ARRAY_SIZE];
btMatrix3x3 out2[ARRAY_SIZE];
float maxRelativeError = 0.f;
// Init the data
size_t i, j;
for( i = 0; i < ARRAY_SIZE; i++ )
{
in1[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
in2[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
out[i] = TransposeTimesReference(in1[i], in2[i]);
out2[i] = in1[i].transposeTimes(in2[i]);
if( out[i] != out2[i] )
{
// Init an array flanked by guard pages
btMatrix3x3 in1[ARRAY_SIZE];
btMatrix3x3 in2[ARRAY_SIZE];
btMatrix3x3 out[ARRAY_SIZE];
btMatrix3x3 out2[ARRAY_SIZE];
float maxRelativeError = 0.f;
// Init the data
size_t i, j;
for (i = 0; i < ARRAY_SIZE; i++)
{
in1[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4());
in2[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4());
out[i] = TransposeTimesReference(in1[i], in2[i]);
out2[i] = in1[i].transposeTimes(in2[i]);
if (out[i] != out2[i])
{
float relativeError = 0.f;
for (int column=0;column<3;column++)
for (int row=0;row<3;row++)
relativeError = btMax(relativeError,btFabs(out2[i][row][column] - out[i][row][column]) / out[i][row][column]);
for (int column = 0; column < 3; column++)
for (int row = 0; row < 3; row++)
relativeError = btMax(relativeError, btFabs(out2[i][row][column] - out[i][row][column]) / out[i][row][column]);
if (relativeError>1e-6)
if (relativeError > 1e-6)
{
vlog( "failure @ %ld\n", i);
vlog("failure @ %ld\n", i);
btVector3 m0, m1, m2;
m0 = out[i].getRow(0);
m1 = out[i].getRow(1);
m2 = out[i].getRow(2);
vlog( "\ncorrect = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) \n",
m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3],
m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3],
m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]);
vlog(
"\ncorrect = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) \n",
m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3],
m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3],
m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]);
m0 = out2[i].getRow(0);
m1 = out2[i].getRow(1);
m2 = out2[i].getRow(2);
vlog( "\ntested = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) \n",
m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3],
m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3],
m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]);
vlog(
"\ntested = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) \n",
m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3],
m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3],
m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]);
return -1;
} else
}
else
{
if (relativeError>maxRelativeError)
if (relativeError > maxRelativeError)
maxRelativeError = relativeError;
}
}
}
}
}
if (maxRelativeError)
{
printf("Warning: maxRelativeError = %e\n",maxRelativeError);
printf("Warning: maxRelativeError = %e\n", maxRelativeError);
}
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++) {
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out[i] = TransposeTimesReference(in1[i], in2[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++) {
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out[i] = in1[i].transposeTimes(in2[i]);
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog( "Timing:\n" );
vlog( "\t scalar\t vector\n" );
vlog( "\t%10.2f\t%10.2f\n", TicksToCycles( scalarTime ) / ARRAY_SIZE, TicksToCycles( vectorTime ) / ARRAY_SIZE );
return 0;
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for (i = 0; i < ARRAY_SIZE; i++)
out[i] = TransposeTimesReference(in1[i], in2[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for (i = 0; i < ARRAY_SIZE; i++)
out[i] = in1[i].transposeTimes(in2[i]);
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog("Timing:\n");
vlog("\t scalar\t vector\n");
vlog("\t%10.2f\t%10.2f\n", TicksToCycles(scalarTime) / ARRAY_SIZE, TicksToCycles(vectorTime) / ARRAY_SIZE);
return 0;
}
#endif //BT_USE_SSE
#endif //BT_USE_SSE

View File

@@ -9,14 +9,14 @@
#define BulletTest_Test_3x3transposeTimes_h
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
int Test_3x3transposeTimes(void);
int Test_3x3transposeTimes(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -5,10 +5,8 @@
// Copyright (c) 2011 Apple Inc., Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#if defined(BT_USE_SSE_IN_API) || defined(BT_USE_NEON)
#include "Test_btDbvt.h"
#include "vector.h"
@@ -20,55 +18,49 @@
#include <BulletCollision/BroadphaseCollision/btDbvt.h>
// reference code for testing purposes
SIMD_FORCE_INLINE bool Intersect_ref( btDbvtAabbMm& a, btDbvtAabbMm& b)
SIMD_FORCE_INLINE bool Intersect_ref(btDbvtAabbMm& a, btDbvtAabbMm& b)
{
return( (a.tMins().x()<=b.tMaxs().x())&&
(a.tMaxs().x()>=b.tMins().x())&&
(a.tMins().y()<=b.tMaxs().y())&&
(a.tMaxs().y()>=b.tMins().y())&&
(a.tMins().z()<=b.tMaxs().z())&&
(a.tMaxs().z()>=b.tMins().z()));
}
SIMD_FORCE_INLINE btScalar Proximity_ref( btDbvtAabbMm& a,
btDbvtAabbMm& b)
{
const btVector3 d=(a.tMins()+a.tMaxs())-(b.tMins()+b.tMaxs());
return(btFabs(d.x())+btFabs(d.y())+btFabs(d.z()));
return ((a.tMins().x() <= b.tMaxs().x()) &&
(a.tMaxs().x() >= b.tMins().x()) &&
(a.tMins().y() <= b.tMaxs().y()) &&
(a.tMaxs().y() >= b.tMins().y()) &&
(a.tMins().z() <= b.tMaxs().z()) &&
(a.tMaxs().z() >= b.tMins().z()));
}
SIMD_FORCE_INLINE int Select_ref( btDbvtAabbMm& o,
btDbvtAabbMm& a,
btDbvtAabbMm& b)
SIMD_FORCE_INLINE btScalar Proximity_ref(btDbvtAabbMm& a,
btDbvtAabbMm& b)
{
return(Proximity_ref(o,a)<Proximity_ref(o,b)?0:1);
const btVector3 d = (a.tMins() + a.tMaxs()) - (b.tMins() + b.tMaxs());
return (btFabs(d.x()) + btFabs(d.y()) + btFabs(d.z()));
}
SIMD_FORCE_INLINE void Merge_ref( btDbvtAabbMm& a,
btDbvtAabbMm& b,
btDbvtAabbMm& r)
SIMD_FORCE_INLINE int Select_ref(btDbvtAabbMm& o,
btDbvtAabbMm& a,
btDbvtAabbMm& b)
{
//
//Changing '3' into '4' to compare with the vector code which changes all 4 floats.
//Erwin: don't do this because the 4th component is ignore and not computed on non-vector code (there is no NEON version and scalar is just 3 components)
//
for(int i=0;i<3;++i)
return (Proximity_ref(o, a) < Proximity_ref(o, b) ? 0 : 1);
}
SIMD_FORCE_INLINE void Merge_ref(btDbvtAabbMm& a,
btDbvtAabbMm& b,
btDbvtAabbMm& r)
{
//
//Changing '3' into '4' to compare with the vector code which changes all 4 floats.
//Erwin: don't do this because the 4th component is ignore and not computed on non-vector code (there is no NEON version and scalar is just 3 components)
//
for (int i = 0; i < 3; ++i)
{
if(a.tMins().m_floats[i]<b.tMins().m_floats[i])
r.tMins().m_floats[i] = a.tMins().m_floats[i];
else
r.tMins().m_floats[i] = b.tMins().m_floats[i];
if(a.tMaxs().m_floats[i]>b.tMaxs().m_floats[i])
r.tMaxs().m_floats[i]=a.tMaxs().m_floats[i];
else
r.tMaxs().m_floats[i]=b.tMaxs().m_floats[i];
if (a.tMins().m_floats[i] < b.tMins().m_floats[i])
r.tMins().m_floats[i] = a.tMins().m_floats[i];
else
r.tMins().m_floats[i] = b.tMins().m_floats[i];
if (a.tMaxs().m_floats[i] > b.tMaxs().m_floats[i])
r.tMaxs().m_floats[i] = a.tMaxs().m_floats[i];
else
r.tMaxs().m_floats[i] = b.tMaxs().m_floats[i];
}
}
/*
@@ -99,244 +91,235 @@ SIMD_FORCE_INLINE void Merge_ref( btDbvtAabbMm& a,
int Test_btDbvt(void)
{
btDbvtAabbMm a[DATA_SIZE], b[DATA_SIZE], c[DATA_SIZE];
btDbvtAabbMm a_ref[DATA_SIZE], b_ref[DATA_SIZE], c_ref[DATA_SIZE];
int i;
bool Intersect_Test_Res[DATA_SIZE], Intersect_Ref_Res[DATA_SIZE];
int Select_Test_Res[DATA_SIZE], Select_Ref_Res[DATA_SIZE];
for (i = 0; i < DATA_SIZE; i++)
{
a[i].tMins().m_floats[0] = (float)rand() / (float)RAND_MAX;
a[i].tMins().m_floats[1] = (float)rand() / (float)RAND_MAX;
a[i].tMins().m_floats[2] = (float)rand() / (float)RAND_MAX;
a[i].tMins().m_floats[3] = (float)rand() / (float)RAND_MAX;
a[i].tMaxs().m_floats[0] = (float)rand() / (float)RAND_MAX;
a[i].tMaxs().m_floats[1] = (float)rand() / (float)RAND_MAX;
a[i].tMaxs().m_floats[2] = (float)rand() / (float)RAND_MAX;
a[i].tMaxs().m_floats[3] = (float)rand() / (float)RAND_MAX;
b[i].tMins().m_floats[0] = (float)rand() / (float)RAND_MAX;
b[i].tMins().m_floats[1] = (float)rand() / (float)RAND_MAX;
b[i].tMins().m_floats[2] = (float)rand() / (float)RAND_MAX;
b[i].tMins().m_floats[3] = (float)rand() / (float)RAND_MAX;
b[i].tMaxs().m_floats[0] = (float)rand() / (float)RAND_MAX;
b[i].tMaxs().m_floats[1] = (float)rand() / (float)RAND_MAX;
b[i].tMaxs().m_floats[2] = (float)rand() / (float)RAND_MAX;
b[i].tMaxs().m_floats[3] = (float)rand() / (float)RAND_MAX;
c[i].tMins().m_floats[0] = (float)rand() / (float)RAND_MAX;
c[i].tMins().m_floats[1] = (float)rand() / (float)RAND_MAX;
c[i].tMins().m_floats[2] = (float)rand() / (float)RAND_MAX;
c[i].tMins().m_floats[3] = (float)rand() / (float)RAND_MAX;
c[i].tMaxs().m_floats[0] = (float)rand() / (float)RAND_MAX;
c[i].tMaxs().m_floats[1] = (float)rand() / (float)RAND_MAX;
c[i].tMaxs().m_floats[2] = (float)rand() / (float)RAND_MAX;
c[i].tMaxs().m_floats[3] = (float)rand() / (float)RAND_MAX;
a_ref[i].tMins().m_floats[0] = a[i].tMins().m_floats[0];
a_ref[i].tMins().m_floats[1] = a[i].tMins().m_floats[1];
a_ref[i].tMins().m_floats[2] = a[i].tMins().m_floats[2];
a_ref[i].tMins().m_floats[3] = a[i].tMins().m_floats[3];
a_ref[i].tMaxs().m_floats[0] = a[i].tMaxs().m_floats[0];
a_ref[i].tMaxs().m_floats[1] = a[i].tMaxs().m_floats[1];
a_ref[i].tMaxs().m_floats[2] = a[i].tMaxs().m_floats[2];
a_ref[i].tMaxs().m_floats[3] = a[i].tMaxs().m_floats[3];
b_ref[i].tMins().m_floats[0] = b[i].tMins().m_floats[0];
b_ref[i].tMins().m_floats[1] = b[i].tMins().m_floats[1];
b_ref[i].tMins().m_floats[2] = b[i].tMins().m_floats[2];
b_ref[i].tMins().m_floats[3] = b[i].tMins().m_floats[3];
b_ref[i].tMaxs().m_floats[0] = b[i].tMaxs().m_floats[0];
b_ref[i].tMaxs().m_floats[1] = b[i].tMaxs().m_floats[1];
b_ref[i].tMaxs().m_floats[2] = b[i].tMaxs().m_floats[2];
b_ref[i].tMaxs().m_floats[3] = b[i].tMaxs().m_floats[3];
c_ref[i].tMins().m_floats[0] = c[i].tMins().m_floats[0];
c_ref[i].tMins().m_floats[1] = c[i].tMins().m_floats[1];
c_ref[i].tMins().m_floats[2] = c[i].tMins().m_floats[2];
c_ref[i].tMins().m_floats[3] = c[i].tMins().m_floats[3];
c_ref[i].tMaxs().m_floats[0] = c[i].tMaxs().m_floats[0];
c_ref[i].tMaxs().m_floats[1] = c[i].tMaxs().m_floats[1];
c_ref[i].tMaxs().m_floats[2] = c[i].tMaxs().m_floats[2];
c_ref[i].tMaxs().m_floats[3] = c[i].tMaxs().m_floats[3];
}
#if 1
for (i = 0; i < DATA_SIZE; i++)
{
Intersect_Test_Res[i] = Intersect(a[i], b[i]);
Intersect_Ref_Res[i] = Intersect_ref(a_ref[i], b_ref[i]);
if(Intersect_Test_Res[i] != Intersect_Ref_Res[i])
{
printf("Diff on %d\n", i);
printf("a_mx_f[0] = %.3f, a_mx_f[1] = %.3f, a_mx_f[2] = %.3f, a_mx_f[3] = %.3f\n", a[i].tMaxs().m_floats[0], a[i].tMaxs().m_floats[1], a[i].tMaxs().m_floats[2], a[i].tMaxs().m_floats[3]);
printf("a_mi_f[0] = %.3f, a_mi_f[1] = %.3f, a_mi_f[2] = %.3f, a_mi_f[3] = %.3f\n", a[i].tMins().m_floats[0], a[i].tMins().m_floats[1], a[i].tMins().m_floats[2], a[i].tMins().m_floats[3]);
printf("b_mx_f[0] = %.3f, b_mx_f[1] = %.3f, b_mx_f[2] = %.3f, b_mx_f[3] = %.3f\n", b[i].tMaxs().m_floats[0], b[i].tMaxs().m_floats[1], b[i].tMaxs().m_floats[2], b[i].tMaxs().m_floats[3]);
printf("b_mi_f[0] = %.3f, b_mi_f[1] = %.3f, b_mi_f[2] = %.3f, b_mi_f[3] = %.3f\n", b[i].tMins().m_floats[0], b[i].tMins().m_floats[1], b[i].tMins().m_floats[2], b[i].tMins().m_floats[3]);
printf("a_mx_f_ref[0] = %.3f, a_mx_f_ref[1] = %.3f, a_mx_f_ref[2] = %.3f, a_mx_f_ref[3] = %.3f\n", a_ref[i].tMaxs().m_floats[0], a_ref[i].tMaxs().m_floats[1], a_ref[i].tMaxs().m_floats[2], a_ref[i].tMaxs().m_floats[3]);
printf("a_mi_f_ref[0] = %.3f, a_mi_f_ref[1] = %.3f, a_mi_f_ref[2] = %.3f, a_mi_f_ref[3] = %.3f\n", a_ref[i].tMins().m_floats[0], a_ref[i].tMins().m_floats[1], a_ref[i].tMins().m_floats[2], a_ref[i].tMins().m_floats[3]);
printf("b_mx_f_ref[0] = %.3f, b_mx_f_ref[1] = %.3f, b_mx_f_ref[2] = %.3f, b_mx_f_ref[3] = %.3f\n", b_ref[i].tMaxs().m_floats[0], b_ref[i].tMaxs().m_floats[1], b_ref[i].tMaxs().m_floats[2], b_ref[i].tMaxs().m_floats[3]);
printf("b_mi_f_ref[0] = %.3f, b_mi_f_ref[1] = %.3f, b_mi_f_ref[2] = %.3f, b_mi_f_ref[3] = %.3f\n", b_ref[i].tMins().m_floats[0], b_ref[i].tMins().m_floats[1], b_ref[i].tMins().m_floats[2], b_ref[i].tMins().m_floats[3]);
}
}
#endif
uint64_t scalarTime;
uint64_t vectorTime;
size_t j;
////////////////////////////////////
//
// Time and Test Intersect
//
////////////////////////////////////
btDbvtAabbMm a[DATA_SIZE], b[DATA_SIZE], c[DATA_SIZE];
btDbvtAabbMm a_ref[DATA_SIZE], b_ref[DATA_SIZE], c_ref[DATA_SIZE];
int i;
bool Intersect_Test_Res[DATA_SIZE], Intersect_Ref_Res[DATA_SIZE];
int Select_Test_Res[DATA_SIZE], Select_Ref_Res[DATA_SIZE];
for (i = 0; i < DATA_SIZE; i++)
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
a[i].tMins().m_floats[0] = (float)rand() / (float)RAND_MAX;
a[i].tMins().m_floats[1] = (float)rand() / (float)RAND_MAX;
a[i].tMins().m_floats[2] = (float)rand() / (float)RAND_MAX;
a[i].tMins().m_floats[3] = (float)rand() / (float)RAND_MAX;
a[i].tMaxs().m_floats[0] = (float)rand() / (float)RAND_MAX;
a[i].tMaxs().m_floats[1] = (float)rand() / (float)RAND_MAX;
a[i].tMaxs().m_floats[2] = (float)rand() / (float)RAND_MAX;
a[i].tMaxs().m_floats[3] = (float)rand() / (float)RAND_MAX;
b[i].tMins().m_floats[0] = (float)rand() / (float)RAND_MAX;
b[i].tMins().m_floats[1] = (float)rand() / (float)RAND_MAX;
b[i].tMins().m_floats[2] = (float)rand() / (float)RAND_MAX;
b[i].tMins().m_floats[3] = (float)rand() / (float)RAND_MAX;
b[i].tMaxs().m_floats[0] = (float)rand() / (float)RAND_MAX;
b[i].tMaxs().m_floats[1] = (float)rand() / (float)RAND_MAX;
b[i].tMaxs().m_floats[2] = (float)rand() / (float)RAND_MAX;
b[i].tMaxs().m_floats[3] = (float)rand() / (float)RAND_MAX;
c[i].tMins().m_floats[0] = (float)rand() / (float)RAND_MAX;
c[i].tMins().m_floats[1] = (float)rand() / (float)RAND_MAX;
c[i].tMins().m_floats[2] = (float)rand() / (float)RAND_MAX;
c[i].tMins().m_floats[3] = (float)rand() / (float)RAND_MAX;
c[i].tMaxs().m_floats[0] = (float)rand() / (float)RAND_MAX;
c[i].tMaxs().m_floats[1] = (float)rand() / (float)RAND_MAX;
c[i].tMaxs().m_floats[2] = (float)rand() / (float)RAND_MAX;
c[i].tMaxs().m_floats[3] = (float)rand() / (float)RAND_MAX;
a_ref[i].tMins().m_floats[0] = a[i].tMins().m_floats[0];
a_ref[i].tMins().m_floats[1] = a[i].tMins().m_floats[1];
a_ref[i].tMins().m_floats[2] = a[i].tMins().m_floats[2];
a_ref[i].tMins().m_floats[3] = a[i].tMins().m_floats[3];
a_ref[i].tMaxs().m_floats[0] = a[i].tMaxs().m_floats[0];
a_ref[i].tMaxs().m_floats[1] = a[i].tMaxs().m_floats[1];
a_ref[i].tMaxs().m_floats[2] = a[i].tMaxs().m_floats[2];
a_ref[i].tMaxs().m_floats[3] = a[i].tMaxs().m_floats[3];
b_ref[i].tMins().m_floats[0] = b[i].tMins().m_floats[0];
b_ref[i].tMins().m_floats[1] = b[i].tMins().m_floats[1];
b_ref[i].tMins().m_floats[2] = b[i].tMins().m_floats[2];
b_ref[i].tMins().m_floats[3] = b[i].tMins().m_floats[3];
b_ref[i].tMaxs().m_floats[0] = b[i].tMaxs().m_floats[0];
b_ref[i].tMaxs().m_floats[1] = b[i].tMaxs().m_floats[1];
b_ref[i].tMaxs().m_floats[2] = b[i].tMaxs().m_floats[2];
b_ref[i].tMaxs().m_floats[3] = b[i].tMaxs().m_floats[3];
c_ref[i].tMins().m_floats[0] = c[i].tMins().m_floats[0];
c_ref[i].tMins().m_floats[1] = c[i].tMins().m_floats[1];
c_ref[i].tMins().m_floats[2] = c[i].tMins().m_floats[2];
c_ref[i].tMins().m_floats[3] = c[i].tMins().m_floats[3];
c_ref[i].tMaxs().m_floats[0] = c[i].tMaxs().m_floats[0];
c_ref[i].tMaxs().m_floats[1] = c[i].tMaxs().m_floats[1];
c_ref[i].tMaxs().m_floats[2] = c[i].tMaxs().m_floats[2];
c_ref[i].tMaxs().m_floats[3] = c[i].tMaxs().m_floats[3];
}
#if 1
for (i = 0; i < DATA_SIZE; i++)
{
Intersect_Test_Res[i] = Intersect(a[i], b[i]);
Intersect_Ref_Res[i] = Intersect_ref(a_ref[i], b_ref[i]);
if (Intersect_Test_Res[i] != Intersect_Ref_Res[i])
{
startTime = ReadTicks();
for (i = 0; i < DATA_SIZE; i++)
{
Intersect_Ref_Res[i] = Intersect_ref(a_ref[i], b_ref[i]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
printf("Diff on %d\n", i);
printf("a_mx_f[0] = %.3f, a_mx_f[1] = %.3f, a_mx_f[2] = %.3f, a_mx_f[3] = %.3f\n", a[i].tMaxs().m_floats[0], a[i].tMaxs().m_floats[1], a[i].tMaxs().m_floats[2], a[i].tMaxs().m_floats[3]);
printf("a_mi_f[0] = %.3f, a_mi_f[1] = %.3f, a_mi_f[2] = %.3f, a_mi_f[3] = %.3f\n", a[i].tMins().m_floats[0], a[i].tMins().m_floats[1], a[i].tMins().m_floats[2], a[i].tMins().m_floats[3]);
printf("b_mx_f[0] = %.3f, b_mx_f[1] = %.3f, b_mx_f[2] = %.3f, b_mx_f[3] = %.3f\n", b[i].tMaxs().m_floats[0], b[i].tMaxs().m_floats[1], b[i].tMaxs().m_floats[2], b[i].tMaxs().m_floats[3]);
printf("b_mi_f[0] = %.3f, b_mi_f[1] = %.3f, b_mi_f[2] = %.3f, b_mi_f[3] = %.3f\n", b[i].tMins().m_floats[0], b[i].tMins().m_floats[1], b[i].tMins().m_floats[2], b[i].tMins().m_floats[3]);
printf("a_mx_f_ref[0] = %.3f, a_mx_f_ref[1] = %.3f, a_mx_f_ref[2] = %.3f, a_mx_f_ref[3] = %.3f\n", a_ref[i].tMaxs().m_floats[0], a_ref[i].tMaxs().m_floats[1], a_ref[i].tMaxs().m_floats[2], a_ref[i].tMaxs().m_floats[3]);
printf("a_mi_f_ref[0] = %.3f, a_mi_f_ref[1] = %.3f, a_mi_f_ref[2] = %.3f, a_mi_f_ref[3] = %.3f\n", a_ref[i].tMins().m_floats[0], a_ref[i].tMins().m_floats[1], a_ref[i].tMins().m_floats[2], a_ref[i].tMins().m_floats[3]);
printf("b_mx_f_ref[0] = %.3f, b_mx_f_ref[1] = %.3f, b_mx_f_ref[2] = %.3f, b_mx_f_ref[3] = %.3f\n", b_ref[i].tMaxs().m_floats[0], b_ref[i].tMaxs().m_floats[1], b_ref[i].tMaxs().m_floats[2], b_ref[i].tMaxs().m_floats[3]);
printf("b_mi_f_ref[0] = %.3f, b_mi_f_ref[1] = %.3f, b_mi_f_ref[2] = %.3f, b_mi_f_ref[3] = %.3f\n", b_ref[i].tMins().m_floats[0], b_ref[i].tMins().m_floats[1], b_ref[i].tMins().m_floats[2], b_ref[i].tMins().m_floats[3]);
}
}
#endif
uint64_t scalarTime;
uint64_t vectorTime;
size_t j;
////////////////////////////////////
//
// Time and Test Intersect
//
////////////////////////////////////
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for (i = 0; i < DATA_SIZE; i++)
{
Intersect_Test_Res[i] = Intersect(a[i], b[i]);
}
startTime = ReadTicks();
for (i = 0; i < DATA_SIZE; i++)
{
Intersect_Ref_Res[i] = Intersect_ref(a_ref[i], b_ref[i]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Intersect Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT );
//printf("scalar = %llu, vector = %llu\n", scalarTime, vectorTime);
for (i = 0; i < DATA_SIZE; i++)
{
if(Intersect_Test_Res[i] != Intersect_Ref_Res[i])
{
printf("Intersect fail at %d\n", i);
return 1;
}
}
////////////////////////////////////
//
// Time and Test Merge
//
////////////////////////////////////
scalarTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for (i = 0; i < DATA_SIZE; i++)
{
Merge_ref(a_ref[i], b_ref[i], c_ref[i]);
}
startTime = ReadTicks();
for (i = 0; i < DATA_SIZE; i++)
{
Intersect_Test_Res[i] = Intersect(a[i], b[i]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
vectorTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog("Intersect Timing:\n");
vlog(" \t scalar\t vector\n");
vlog(" \t%10.4f\t%10.4f\n", TicksToCycles(scalarTime) / LOOPCOUNT, TicksToCycles(vectorTime) / LOOPCOUNT);
//printf("scalar = %llu, vector = %llu\n", scalarTime, vectorTime);
for (i = 0; i < DATA_SIZE; i++)
{
if (Intersect_Test_Res[i] != Intersect_Ref_Res[i])
{
startTime = ReadTicks();
for (i = 0; i < DATA_SIZE; i++)
{
Merge(a[i], b[i], c[i]);
}
printf("Intersect fail at %d\n", i);
return 1;
}
}
////////////////////////////////////
//
// Time and Test Merge
//
////////////////////////////////////
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for (i = 0; i < DATA_SIZE; i++)
{
Merge_ref(a_ref[i], b_ref[i], c_ref[i]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Merge Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT );
//printf("scalar = %llu, vector = %llu\n", scalarTime, vectorTime);
/*
scalarTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for (i = 0; i < DATA_SIZE; i++)
{
Merge(a[i], b[i], c[i]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog("Merge Timing:\n");
vlog(" \t scalar\t vector\n");
vlog(" \t%10.4f\t%10.4f\n", TicksToCycles(scalarTime) / LOOPCOUNT, TicksToCycles(vectorTime) / LOOPCOUNT);
//printf("scalar = %llu, vector = %llu\n", scalarTime, vectorTime);
/*
c [0] float32_t 0.00455523
[1] float32_t 0.559712
[2] float32_t 0.0795838
@@ -360,136 +343,127 @@ c [0] float32_t 0.829904
[2] float32_t 0.961654
[3] float32_t 0.522878
*/
for (i = 0; i < DATA_SIZE; i++)
{
//ignore 4th component because it is not computed in all code-paths
if( (fabs(c[i].tMaxs().m_floats[0] - c_ref[i].tMaxs().m_floats[0]) > 0.001) ||
(fabs(c[i].tMaxs().m_floats[1] - c_ref[i].tMaxs().m_floats[1]) > 0.001) ||
(fabs(c[i].tMaxs().m_floats[2] - c_ref[i].tMaxs().m_floats[2]) > 0.001) ||
// (fabs(c[i].tMaxs().m_floats[3] - c_ref[i].tMaxs().m_floats[3]) > 0.001) ||
(fabs(c[i].tMins().m_floats[0] - c_ref[i].tMins().m_floats[0]) > 0.001) ||
(fabs(c[i].tMins().m_floats[1] - c_ref[i].tMins().m_floats[1]) > 0.001) ||
(fabs(c[i].tMins().m_floats[2] - c_ref[i].tMins().m_floats[2]) > 0.001)
//|| (fabs(c[i].tMins().m_floats[3] - c_ref[i].tMins().m_floats[3]) > 0.001)
)
//if((c[i].tMaxs().m_floats[0] != c_ref[i].tMaxs().m_floats[0]) || (c[i].tMaxs().m_floats[1] != c_ref[i].tMaxs().m_floats[1]) || (c[i].tMaxs().m_floats[2] != c_ref[i].tMaxs().m_floats[2]) || (c[i].tMaxs().m_floats[3] != c_ref[i].tMaxs().m_floats[3]) || (c[i].tMins().m_floats[0] != c_ref[i].tMins().m_floats[0]) || (c[i].tMins().m_floats[1] != c_ref[i].tMins().m_floats[1]) || (c[i].tMins().m_floats[2] != c_ref[i].tMins().m_floats[2]) || (c[i].tMins().m_floats[3] != c_ref[i].tMins().m_floats[3]))
{
printf("Merge fail at %d with test = %d, ref = %d\n", i, Select_Test_Res[i], Select_Ref_Res[i]);
printf("a_mx_f[0] = %.3f, a_mx_f[1] = %.3f, a_mx_f[2] = %.3f, a_mx_f[3] = %.3f\n", a[i].tMaxs().m_floats[0], a[i].tMaxs().m_floats[1], a[i].tMaxs().m_floats[2], a[i].tMaxs().m_floats[3]);
printf("a_mi_f[0] = %.3f, a_mi_f[1] = %.3f, a_mi_f[2] = %.3f, a_mi_f[3] = %.3f\n", a[i].tMins().m_floats[0], a[i].tMins().m_floats[1], a[i].tMins().m_floats[2], a[i].tMins().m_floats[3]);
printf("b_mx_f[0] = %.3f, b_mx_f[1] = %.3f, b_mx_f[2] = %.3f, b_mx_f[3] = %.3f\n", b[i].tMaxs().m_floats[0], b[i].tMaxs().m_floats[1], b[i].tMaxs().m_floats[2], b[i].tMaxs().m_floats[3]);
printf("b_mi_f[0] = %.3f, b_mi_f[1] = %.3f, b_mi_f[2] = %.3f, b_mi_f[3] = %.3f\n", b[i].tMins().m_floats[0], b[i].tMins().m_floats[1], b[i].tMins().m_floats[2], b[i].tMins().m_floats[3]);
printf("c_mx_f[0] = %.3f, c_mx_f[1] = %.3f, c_mx_f[2] = %.3f, c_mx_f[3] = %.3f\n", c[i].tMaxs().m_floats[0], c[i].tMaxs().m_floats[1], c[i].tMaxs().m_floats[2], c[i].tMaxs().m_floats[3]);
printf("c_mi_f[0] = %.3f, c_mi_f[1] = %.3f, c_mi_f[2] = %.3f, c_mi_f[3] = %.3f\n", c[i].tMins().m_floats[0], c[i].tMins().m_floats[1], c[i].tMins().m_floats[2], c[i].tMins().m_floats[3]);
printf("a_mx_f_ref[0] = %.3f, a_mx_f_ref[1] = %.3f, a_mx_f_ref[2] = %.3f, a_mx_f_ref[3] = %.3f\n", a_ref[i].tMaxs().m_floats[0], a_ref[i].tMaxs().m_floats[1], a_ref[i].tMaxs().m_floats[2], a_ref[i].tMaxs().m_floats[3]);
printf("a_mi_f_ref[0] = %.3f, a_mi_f_ref[1] = %.3f, a_mi_f_ref[2] = %.3f, a_mi_f_ref[3] = %.3f\n", a_ref[i].tMins().m_floats[0], a_ref[i].tMins().m_floats[1], a_ref[i].tMins().m_floats[2], a_ref[i].tMins().m_floats[3]);
printf("b_mx_f_ref[0] = %.3f, b_mx_f_ref[1] = %.3f, b_mx_f_ref[2] = %.3f, b_mx_f_ref[3] = %.3f\n", b_ref[i].tMaxs().m_floats[0], b_ref[i].tMaxs().m_floats[1], b_ref[i].tMaxs().m_floats[2], b_ref[i].tMaxs().m_floats[3]);
printf("b_mi_f_ref[0] = %.3f, b_mi_f_ref[1] = %.3f, b_mi_f_ref[2] = %.3f, b_mi_f_ref[3] = %.3f\n", b_ref[i].tMins().m_floats[0], b_ref[i].tMins().m_floats[1], b_ref[i].tMins().m_floats[2], b_ref[i].tMins().m_floats[3]);
printf("c_mx_f_ref[0] = %.3f, c_mx_f_ref[1] = %.3f, c_mx_f_ref[2] = %.3f, c_mx_f_ref[3] = %.3f\n", c_ref[i].tMaxs().m_floats[0], c_ref[i].tMaxs().m_floats[1], c_ref[i].tMaxs().m_floats[2], c_ref[i].tMaxs().m_floats[3]);
printf("c_mi_f_ref[0] = %.3f, c_mi_f_ref[1] = %.3f, c_mi_f_ref[2] = %.3f, c_mi_f_ref[3] = %.3f\n", c_ref[i].tMins().m_floats[0], c_ref[i].tMins().m_floats[1], c_ref[i].tMins().m_floats[2], c_ref[i].tMins().m_floats[3]);
return 1;
}
}
////////////////////////////////////
//
// Time and Test Select
//
////////////////////////////////////
for (i = 0; i < DATA_SIZE; i++)
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
//ignore 4th component because it is not computed in all code-paths
if ((fabs(c[i].tMaxs().m_floats[0] - c_ref[i].tMaxs().m_floats[0]) > 0.001) ||
(fabs(c[i].tMaxs().m_floats[1] - c_ref[i].tMaxs().m_floats[1]) > 0.001) ||
(fabs(c[i].tMaxs().m_floats[2] - c_ref[i].tMaxs().m_floats[2]) > 0.001) ||
// (fabs(c[i].tMaxs().m_floats[3] - c_ref[i].tMaxs().m_floats[3]) > 0.001) ||
(fabs(c[i].tMins().m_floats[0] - c_ref[i].tMins().m_floats[0]) > 0.001) ||
(fabs(c[i].tMins().m_floats[1] - c_ref[i].tMins().m_floats[1]) > 0.001) ||
(fabs(c[i].tMins().m_floats[2] - c_ref[i].tMins().m_floats[2]) > 0.001)
//|| (fabs(c[i].tMins().m_floats[3] - c_ref[i].tMins().m_floats[3]) > 0.001)
)
//if((c[i].tMaxs().m_floats[0] != c_ref[i].tMaxs().m_floats[0]) || (c[i].tMaxs().m_floats[1] != c_ref[i].tMaxs().m_floats[1]) || (c[i].tMaxs().m_floats[2] != c_ref[i].tMaxs().m_floats[2]) || (c[i].tMaxs().m_floats[3] != c_ref[i].tMaxs().m_floats[3]) || (c[i].tMins().m_floats[0] != c_ref[i].tMins().m_floats[0]) || (c[i].tMins().m_floats[1] != c_ref[i].tMins().m_floats[1]) || (c[i].tMins().m_floats[2] != c_ref[i].tMins().m_floats[2]) || (c[i].tMins().m_floats[3] != c_ref[i].tMins().m_floats[3]))
{
startTime = ReadTicks();
for (i = 0; i < DATA_SIZE; i++)
{
Select_Ref_Res[i] = Select_ref(a_ref[i], b_ref[i], c_ref[i]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for (i = 0; i < DATA_SIZE; i++)
{
Select_Test_Res[i] = Select(a[i], b[i], c[i]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Select Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT );
//printf("scalar = %llu, vector = %llu\n", scalarTime, vectorTime);
for (i = 0; i < DATA_SIZE; i++)
{
Select_Ref_Res[i] = Select_ref(a_ref[i], b_ref[i], c_ref[i]);
Select_Test_Res[i] = Select(a[i], b[i], c[i]);
if(Select_Test_Res[i] != Select_Ref_Res[i])
{
printf("Select fail at %d with test = %d, ref = %d\n", i, Select_Test_Res[i], Select_Ref_Res[i]);
printf("a_mx_f[0] = %.3f, a_mx_f[1] = %.3f, a_mx_f[2] = %.3f, a_mx_f[3] = %.3f\n", a[i].tMaxs().m_floats[0], a[i].tMaxs().m_floats[1], a[i].tMaxs().m_floats[2], a[i].tMaxs().m_floats[3]);
printf("a_mi_f[0] = %.3f, a_mi_f[1] = %.3f, a_mi_f[2] = %.3f, a_mi_f[3] = %.3f\n", a[i].tMins().m_floats[0], a[i].tMins().m_floats[1], a[i].tMins().m_floats[2], a[i].tMins().m_floats[3]);
printf("b_mx_f[0] = %.3f, b_mx_f[1] = %.3f, b_mx_f[2] = %.3f, b_mx_f[3] = %.3f\n", b[i].tMaxs().m_floats[0], b[i].tMaxs().m_floats[1], b[i].tMaxs().m_floats[2], b[i].tMaxs().m_floats[3]);
printf("b_mi_f[0] = %.3f, b_mi_f[1] = %.3f, b_mi_f[2] = %.3f, b_mi_f[3] = %.3f\n", b[i].tMins().m_floats[0], b[i].tMins().m_floats[1], b[i].tMins().m_floats[2], b[i].tMins().m_floats[3]);
printf("c_mx_f[0] = %.3f, c_mx_f[1] = %.3f, c_mx_f[2] = %.3f, c_mx_f[3] = %.3f\n", c[i].tMaxs().m_floats[0], c[i].tMaxs().m_floats[1], c[i].tMaxs().m_floats[2], c[i].tMaxs().m_floats[3]);
printf("c_mi_f[0] = %.3f, c_mi_f[1] = %.3f, c_mi_f[2] = %.3f, c_mi_f[3] = %.3f\n", c[i].tMins().m_floats[0], c[i].tMins().m_floats[1], c[i].tMins().m_floats[2], c[i].tMins().m_floats[3]);
printf("a_mx_f_ref[0] = %.3f, a_mx_f_ref[1] = %.3f, a_mx_f_ref[2] = %.3f, a_mx_f_ref[3] = %.3f\n", a_ref[i].tMaxs().m_floats[0], a_ref[i].tMaxs().m_floats[1], a_ref[i].tMaxs().m_floats[2], a_ref[i].tMaxs().m_floats[3]);
printf("a_mi_f_ref[0] = %.3f, a_mi_f_ref[1] = %.3f, a_mi_f_ref[2] = %.3f, a_mi_f_ref[3] = %.3f\n", a_ref[i].tMins().m_floats[0], a_ref[i].tMins().m_floats[1], a_ref[i].tMins().m_floats[2], a_ref[i].tMins().m_floats[3]);
printf("b_mx_f_ref[0] = %.3f, b_mx_f_ref[1] = %.3f, b_mx_f_ref[2] = %.3f, b_mx_f_ref[3] = %.3f\n", b_ref[i].tMaxs().m_floats[0], b_ref[i].tMaxs().m_floats[1], b_ref[i].tMaxs().m_floats[2], b_ref[i].tMaxs().m_floats[3]);
printf("b_mi_f_ref[0] = %.3f, b_mi_f_ref[1] = %.3f, b_mi_f_ref[2] = %.3f, b_mi_f_ref[3] = %.3f\n", b_ref[i].tMins().m_floats[0], b_ref[i].tMins().m_floats[1], b_ref[i].tMins().m_floats[2], b_ref[i].tMins().m_floats[3]);
printf("c_mx_f_ref[0] = %.3f, c_mx_f_ref[1] = %.3f, c_mx_f_ref[2] = %.3f, c_mx_f_ref[3] = %.3f\n", c_ref[i].tMaxs().m_floats[0], c_ref[i].tMaxs().m_floats[1], c_ref[i].tMaxs().m_floats[2], c_ref[i].tMaxs().m_floats[3]);
printf("c_mi_f_ref[0] = %.3f, c_mi_f_ref[1] = %.3f, c_mi_f_ref[2] = %.3f, c_mi_f_ref[3] = %.3f\n", c_ref[i].tMins().m_floats[0], c_ref[i].tMins().m_floats[1], c_ref[i].tMins().m_floats[2], c_ref[i].tMins().m_floats[3]);
printf("Merge fail at %d with test = %d, ref = %d\n", i, Select_Test_Res[i], Select_Ref_Res[i]);
printf("a_mx_f[0] = %.3f, a_mx_f[1] = %.3f, a_mx_f[2] = %.3f, a_mx_f[3] = %.3f\n", a[i].tMaxs().m_floats[0], a[i].tMaxs().m_floats[1], a[i].tMaxs().m_floats[2], a[i].tMaxs().m_floats[3]);
printf("a_mi_f[0] = %.3f, a_mi_f[1] = %.3f, a_mi_f[2] = %.3f, a_mi_f[3] = %.3f\n", a[i].tMins().m_floats[0], a[i].tMins().m_floats[1], a[i].tMins().m_floats[2], a[i].tMins().m_floats[3]);
printf("b_mx_f[0] = %.3f, b_mx_f[1] = %.3f, b_mx_f[2] = %.3f, b_mx_f[3] = %.3f\n", b[i].tMaxs().m_floats[0], b[i].tMaxs().m_floats[1], b[i].tMaxs().m_floats[2], b[i].tMaxs().m_floats[3]);
printf("b_mi_f[0] = %.3f, b_mi_f[1] = %.3f, b_mi_f[2] = %.3f, b_mi_f[3] = %.3f\n", b[i].tMins().m_floats[0], b[i].tMins().m_floats[1], b[i].tMins().m_floats[2], b[i].tMins().m_floats[3]);
printf("c_mx_f[0] = %.3f, c_mx_f[1] = %.3f, c_mx_f[2] = %.3f, c_mx_f[3] = %.3f\n", c[i].tMaxs().m_floats[0], c[i].tMaxs().m_floats[1], c[i].tMaxs().m_floats[2], c[i].tMaxs().m_floats[3]);
printf("c_mi_f[0] = %.3f, c_mi_f[1] = %.3f, c_mi_f[2] = %.3f, c_mi_f[3] = %.3f\n", c[i].tMins().m_floats[0], c[i].tMins().m_floats[1], c[i].tMins().m_floats[2], c[i].tMins().m_floats[3]);
printf("a_mx_f_ref[0] = %.3f, a_mx_f_ref[1] = %.3f, a_mx_f_ref[2] = %.3f, a_mx_f_ref[3] = %.3f\n", a_ref[i].tMaxs().m_floats[0], a_ref[i].tMaxs().m_floats[1], a_ref[i].tMaxs().m_floats[2], a_ref[i].tMaxs().m_floats[3]);
printf("a_mi_f_ref[0] = %.3f, a_mi_f_ref[1] = %.3f, a_mi_f_ref[2] = %.3f, a_mi_f_ref[3] = %.3f\n", a_ref[i].tMins().m_floats[0], a_ref[i].tMins().m_floats[1], a_ref[i].tMins().m_floats[2], a_ref[i].tMins().m_floats[3]);
printf("b_mx_f_ref[0] = %.3f, b_mx_f_ref[1] = %.3f, b_mx_f_ref[2] = %.3f, b_mx_f_ref[3] = %.3f\n", b_ref[i].tMaxs().m_floats[0], b_ref[i].tMaxs().m_floats[1], b_ref[i].tMaxs().m_floats[2], b_ref[i].tMaxs().m_floats[3]);
printf("b_mi_f_ref[0] = %.3f, b_mi_f_ref[1] = %.3f, b_mi_f_ref[2] = %.3f, b_mi_f_ref[3] = %.3f\n", b_ref[i].tMins().m_floats[0], b_ref[i].tMins().m_floats[1], b_ref[i].tMins().m_floats[2], b_ref[i].tMins().m_floats[3]);
printf("c_mx_f_ref[0] = %.3f, c_mx_f_ref[1] = %.3f, c_mx_f_ref[2] = %.3f, c_mx_f_ref[3] = %.3f\n", c_ref[i].tMaxs().m_floats[0], c_ref[i].tMaxs().m_floats[1], c_ref[i].tMaxs().m_floats[2], c_ref[i].tMaxs().m_floats[3]);
printf("c_mi_f_ref[0] = %.3f, c_mi_f_ref[1] = %.3f, c_mi_f_ref[2] = %.3f, c_mi_f_ref[3] = %.3f\n", c_ref[i].tMins().m_floats[0], c_ref[i].tMins().m_floats[1], c_ref[i].tMins().m_floats[2], c_ref[i].tMins().m_floats[3]);
return 1;
}
}
return 0;
}
////////////////////////////////////
//
// Time and Test Select
//
////////////////////////////////////
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for (i = 0; i < DATA_SIZE; i++)
{
Select_Ref_Res[i] = Select_ref(a_ref[i], b_ref[i], c_ref[i]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for (i = 0; i < DATA_SIZE; i++)
{
Select_Test_Res[i] = Select(a[i], b[i], c[i]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog("Select Timing:\n");
vlog(" \t scalar\t vector\n");
vlog(" \t%10.4f\t%10.4f\n", TicksToCycles(scalarTime) / LOOPCOUNT, TicksToCycles(vectorTime) / LOOPCOUNT);
//printf("scalar = %llu, vector = %llu\n", scalarTime, vectorTime);
for (i = 0; i < DATA_SIZE; i++)
{
Select_Ref_Res[i] = Select_ref(a_ref[i], b_ref[i], c_ref[i]);
Select_Test_Res[i] = Select(a[i], b[i], c[i]);
if (Select_Test_Res[i] != Select_Ref_Res[i])
{
printf("Select fail at %d with test = %d, ref = %d\n", i, Select_Test_Res[i], Select_Ref_Res[i]);
printf("a_mx_f[0] = %.3f, a_mx_f[1] = %.3f, a_mx_f[2] = %.3f, a_mx_f[3] = %.3f\n", a[i].tMaxs().m_floats[0], a[i].tMaxs().m_floats[1], a[i].tMaxs().m_floats[2], a[i].tMaxs().m_floats[3]);
printf("a_mi_f[0] = %.3f, a_mi_f[1] = %.3f, a_mi_f[2] = %.3f, a_mi_f[3] = %.3f\n", a[i].tMins().m_floats[0], a[i].tMins().m_floats[1], a[i].tMins().m_floats[2], a[i].tMins().m_floats[3]);
printf("b_mx_f[0] = %.3f, b_mx_f[1] = %.3f, b_mx_f[2] = %.3f, b_mx_f[3] = %.3f\n", b[i].tMaxs().m_floats[0], b[i].tMaxs().m_floats[1], b[i].tMaxs().m_floats[2], b[i].tMaxs().m_floats[3]);
printf("b_mi_f[0] = %.3f, b_mi_f[1] = %.3f, b_mi_f[2] = %.3f, b_mi_f[3] = %.3f\n", b[i].tMins().m_floats[0], b[i].tMins().m_floats[1], b[i].tMins().m_floats[2], b[i].tMins().m_floats[3]);
printf("c_mx_f[0] = %.3f, c_mx_f[1] = %.3f, c_mx_f[2] = %.3f, c_mx_f[3] = %.3f\n", c[i].tMaxs().m_floats[0], c[i].tMaxs().m_floats[1], c[i].tMaxs().m_floats[2], c[i].tMaxs().m_floats[3]);
printf("c_mi_f[0] = %.3f, c_mi_f[1] = %.3f, c_mi_f[2] = %.3f, c_mi_f[3] = %.3f\n", c[i].tMins().m_floats[0], c[i].tMins().m_floats[1], c[i].tMins().m_floats[2], c[i].tMins().m_floats[3]);
printf("a_mx_f_ref[0] = %.3f, a_mx_f_ref[1] = %.3f, a_mx_f_ref[2] = %.3f, a_mx_f_ref[3] = %.3f\n", a_ref[i].tMaxs().m_floats[0], a_ref[i].tMaxs().m_floats[1], a_ref[i].tMaxs().m_floats[2], a_ref[i].tMaxs().m_floats[3]);
printf("a_mi_f_ref[0] = %.3f, a_mi_f_ref[1] = %.3f, a_mi_f_ref[2] = %.3f, a_mi_f_ref[3] = %.3f\n", a_ref[i].tMins().m_floats[0], a_ref[i].tMins().m_floats[1], a_ref[i].tMins().m_floats[2], a_ref[i].tMins().m_floats[3]);
printf("b_mx_f_ref[0] = %.3f, b_mx_f_ref[1] = %.3f, b_mx_f_ref[2] = %.3f, b_mx_f_ref[3] = %.3f\n", b_ref[i].tMaxs().m_floats[0], b_ref[i].tMaxs().m_floats[1], b_ref[i].tMaxs().m_floats[2], b_ref[i].tMaxs().m_floats[3]);
printf("b_mi_f_ref[0] = %.3f, b_mi_f_ref[1] = %.3f, b_mi_f_ref[2] = %.3f, b_mi_f_ref[3] = %.3f\n", b_ref[i].tMins().m_floats[0], b_ref[i].tMins().m_floats[1], b_ref[i].tMins().m_floats[2], b_ref[i].tMins().m_floats[3]);
printf("c_mx_f_ref[0] = %.3f, c_mx_f_ref[1] = %.3f, c_mx_f_ref[2] = %.3f, c_mx_f_ref[3] = %.3f\n", c_ref[i].tMaxs().m_floats[0], c_ref[i].tMaxs().m_floats[1], c_ref[i].tMaxs().m_floats[2], c_ref[i].tMaxs().m_floats[3]);
printf("c_mi_f_ref[0] = %.3f, c_mi_f_ref[1] = %.3f, c_mi_f_ref[2] = %.3f, c_mi_f_ref[3] = %.3f\n", c_ref[i].tMins().m_floats[0], c_ref[i].tMins().m_floats[1], c_ref[i].tMins().m_floats[2], c_ref[i].tMins().m_floats[3]);
return 1;
}
}
return 0;
}
#endif

View File

@@ -9,11 +9,12 @@
#define BulletTest_Test_btDbvt_h
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
int Test_btDbvt(void);
int Test_btDbvt(void);
#ifdef __cplusplus
}
#endif

View File

@@ -5,10 +5,8 @@
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#if defined(BT_USE_SSE_IN_API) || defined(BT_USE_NEON)
#include "Test_dot3.h"
#include "vector.h"
@@ -20,10 +18,10 @@
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static btVector3 dot3_ref( const btVector3 &, const btVector3 &, const btVector3 &, const btVector3 &);
static btVector3 dot3_ref( const btVector3 &v, const btVector3 &v1, const btVector3 &v2, const btVector3 &v3)
static btVector3 dot3_ref(const btVector3 &, const btVector3 &, const btVector3 &, const btVector3 &);
static btVector3 dot3_ref(const btVector3 &v, const btVector3 &v1, const btVector3 &v2, const btVector3 &v3)
{
return btVector3( v.dot(v1), v.dot(v2), v.dot(v3));
return btVector3(v.dot(v1), v.dot(v2), v.dot(v3));
}
/*
@@ -44,110 +42,112 @@ SIMD_FORCE_INLINE int operator!=(const btVector3 &s, const btVector3 &v)
#endif
}
*/
#define LOOPCOUNT 1000
#define NUM_CYCLES 10000
int Test_dot3(void)
{
btVector3 v, v1, v2, v3;
btVector3 v, v1, v2, v3;
#define DATA_SIZE 1024
btVector3 vec3_arr[DATA_SIZE];
btVector3 vec3_arr1[DATA_SIZE];
btVector3 vec3_arr2[DATA_SIZE];
btVector3 vec3_arr3[DATA_SIZE];
btVector3 res_arr[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
btVector3 correct, test;
for( k = 0; k < DATA_SIZE; k++ )
btVector3 res_arr[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
btVector3 correct, test;
for (k = 0; k < DATA_SIZE; k++)
{
vec3_arr[k] = btVector3( btAssign128( RANDF, RANDF, RANDF, BT_NAN));
vec3_arr1[k] = btVector3( btAssign128( RANDF, RANDF, RANDF, BT_NAN));
vec3_arr2[k] = btVector3( btAssign128( RANDF, RANDF, RANDF, BT_NAN ));
vec3_arr3[k] = btVector3( btAssign128( RANDF, RANDF, RANDF, BT_NAN));
vec3_arr[k] = btVector3(btAssign128(RANDF, RANDF, RANDF, BT_NAN));
vec3_arr1[k] = btVector3(btAssign128(RANDF, RANDF, RANDF, BT_NAN));
vec3_arr2[k] = btVector3(btAssign128(RANDF, RANDF, RANDF, BT_NAN));
vec3_arr3[k] = btVector3(btAssign128(RANDF, RANDF, RANDF, BT_NAN));
correct = dot3_ref(vec3_arr[k], vec3_arr1[k], vec3_arr2[k], vec3_arr3[k]);
test = vec3_arr[k].dot3( vec3_arr1[k], vec3_arr2[k], vec3_arr3[k]);
if( correct != test )
test = vec3_arr[k].dot3(vec3_arr1[k], vec3_arr2[k], vec3_arr3[k]);
if (correct != test)
{
vlog( "Error (%ld) - dot3 result error! *{%a, %a, %a, %a} != {%a, %a, %a, %a} \n", k,
correct.x(), correct.y(), correct.z(), correct.w(),
test.x(), test.y(), test.z(), test.w() );
vlog("Error (%ld) - dot3 result error! *{%a, %a, %a, %a} != {%a, %a, %a, %a} \n", k,
correct.x(), correct.y(), correct.z(), correct.w(),
test.x(), test.y(), test.z(), test.w());
return 1;
}
}
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
startTime = ReadTicks();
for (k = 0; k + 4 <= LOOPCOUNT; k += 4)
{
size_t k32 = (k & (DATA_SIZE-1));
res_arr[k32] = dot3_ref( vec3_arr[k32], vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]); k32++;
res_arr[k32] = dot3_ref( vec3_arr[k32], vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]); k32++;
res_arr[k32] = dot3_ref( vec3_arr[k32], vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]); k32++;
res_arr[k32] = dot3_ref( vec3_arr[k32], vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]);
size_t k32 = (k & (DATA_SIZE - 1));
res_arr[k32] = dot3_ref(vec3_arr[k32], vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]);
k32++;
res_arr[k32] = dot3_ref(vec3_arr[k32], vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]);
k32++;
res_arr[k32] = dot3_ref(vec3_arr[k32], vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]);
k32++;
res_arr[k32] = dot3_ref(vec3_arr[k32], vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
scalarTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
startTime = ReadTicks();
for (k = 0; k + 4 <= LOOPCOUNT; k += 4)
{
size_t k32 = (k & (DATA_SIZE-1));
res_arr[k32] = vec3_arr[k32].dot3( vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]); k32++;
res_arr[k32] = vec3_arr[k32].dot3( vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]); k32++;
res_arr[k32] = vec3_arr[k32].dot3( vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]); k32++;
res_arr[k32] = vec3_arr[k32].dot3( vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]);
size_t k32 = (k & (DATA_SIZE - 1));
res_arr[k32] = vec3_arr[k32].dot3(vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]);
k32++;
res_arr[k32] = vec3_arr[k32].dot3(vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]);
k32++;
res_arr[k32] = vec3_arr[k32].dot3(vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]);
k32++;
res_arr[k32] = vec3_arr[k32].dot3(vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT );
return 0;
vectorTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog("Timing:\n");
vlog(" \t scalar\t vector\n");
vlog(" \t%10.4f\t%10.4f\n", TicksToCycles(scalarTime) / LOOPCOUNT, TicksToCycles(vectorTime) / LOOPCOUNT);
return 0;
}
#endif //BT_USE_SSE
#endif //BT_USE_SSE

View File

@@ -9,14 +9,14 @@
#define BulletTest_Test_dot3_h
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
int Test_dot3(void);
int Test_dot3(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -5,11 +5,8 @@
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#if defined(BT_USE_SSE_IN_API) || defined(BT_USE_NEON)
#include "Test_maxdot.h"
#include "vector.h"
@@ -20,114 +17,109 @@
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static long maxdot_ref( const btSimdFloat4 *vertices,
float *vec,
size_t count,
float *dotResult );
static long maxdot_ref(const btSimdFloat4 *vertices,
float *vec,
size_t count,
float *dotResult);
#ifdef __arm__
#define MAX_LOG2_SIZE 9
#define MAX_LOG2_SIZE 9
#else
#define MAX_LOG2_SIZE 10
#define MAX_LOG2_SIZE 10
#endif
#define MAX_SIZE (1U << MAX_LOG2_SIZE)
#define MAX_SIZE (1U << MAX_LOG2_SIZE)
#define LOOPCOUNT 10
int Test_maxdot(void)
{
// Init an array flanked by guard pages
btSimdFloat4 *data = (btSimdFloat4*) GuardCalloc( 1, MAX_SIZE * sizeof(btSimdFloat4), NULL );
float *fp = (float*) data;
long correct, test;
btVector3 localScaling( 0.1f, 0.2f, 0.3f);
size_t size;
// Init the data
size_t i;
for( i = 0; i < MAX_SIZE; i++ )
{
fp[4*i] = (int32_t) RANDF_16;
fp[4*i+1] = (int32_t) RANDF_16;
fp[4*i+2] = (int32_t) RANDF_16;
fp[4*i+3] = BT_NAN; // w channel NaN
}
float correctDot, testDot;
fp = (float*) localScaling;
// Init an array flanked by guard pages
btSimdFloat4 *data = (btSimdFloat4 *)GuardCalloc(1, MAX_SIZE * sizeof(btSimdFloat4), NULL);
float *fp = (float *)data;
long correct, test;
btVector3 localScaling(0.1f, 0.2f, 0.3f);
size_t size;
// Init the data
size_t i;
for (i = 0; i < MAX_SIZE; i++)
{
fp[4 * i] = (int32_t)RANDF_16;
fp[4 * i + 1] = (int32_t)RANDF_16;
fp[4 * i + 2] = (int32_t)RANDF_16;
fp[4 * i + 3] = BT_NAN; // w channel NaN
}
float correctDot, testDot;
fp = (float *)localScaling;
float maxRelativeError = 0.f;
for( size = 1; size <= MAX_SIZE; size++ )
{
float *in = (float*)(data + MAX_SIZE - size);
size_t position;
for( position = 0; position < size; position++ )
{
float *biggest = in + position * 4;
float old[4] = { biggest[0], biggest[1], biggest[2], biggest[3] };
biggest[0] += LARGE_FLOAT17;
biggest[1] += LARGE_FLOAT17;
biggest[2] += LARGE_FLOAT17;
biggest[3] += LARGE_FLOAT17;
correctDot = BT_NAN;
testDot = BT_NAN;
correct = maxdot_ref( (btSimdFloat4*) in, (float*) &localScaling, size, &correctDot);
test = localScaling.maxDot( (btVector3*) in, size, testDot);
if( test < 0 || test >= size )
{
vlog( "Error @ %ld: index out of bounds! *%ld vs %ld \n", size, correct, test);
continue;
}
if( correct != test )
for (size = 1; size <= MAX_SIZE; size++)
{
float *in = (float *)(data + MAX_SIZE - size);
size_t position;
for (position = 0; position < size; position++)
{
float *biggest = in + position * 4;
float old[4] = {biggest[0], biggest[1], biggest[2], biggest[3]};
biggest[0] += LARGE_FLOAT17;
biggest[1] += LARGE_FLOAT17;
biggest[2] += LARGE_FLOAT17;
biggest[3] += LARGE_FLOAT17;
correctDot = BT_NAN;
testDot = BT_NAN;
correct = maxdot_ref((btSimdFloat4 *)in, (float *)&localScaling, size, &correctDot);
test = localScaling.maxDot((btVector3 *)in, size, testDot);
if (test < 0 || test >= size)
{
vlog( "Error @ %ld: index misreported! *%ld vs %ld (*%f, %f)\n", size, correct, test,
fp[0] * in[4*correct] + fp[1] * in[4*correct+1] + fp[2] * in[4*correct+2],
fp[0] * in[4*test] + fp[1] * in[4*test+1] + fp[2] * in[4*test+2] );
vlog("Error @ %ld: index out of bounds! *%ld vs %ld \n", size, correct, test);
continue;
}
if (correct != test)
{
vlog("Error @ %ld: index misreported! *%ld vs %ld (*%f, %f)\n", size, correct, test,
fp[0] * in[4 * correct] + fp[1] * in[4 * correct + 1] + fp[2] * in[4 * correct + 2],
fp[0] * in[4 * test] + fp[1] * in[4 * test + 1] + fp[2] * in[4 * test + 2]);
return 1;
}
if( test != position )
if (test != position)
{
vlog( "Biggest not found where it is supposed to be: *%ld vs %ld (*%f, %f)\n", position, test,
fp[0] * in[4*test] + fp[1] * in[4*test+1] + fp[2] * in[4*test+2],
fp[0] * in[4*position] + fp[1] * in[4*position+1] + fp[2] * in[4*position+2] );
vlog("Biggest not found where it is supposed to be: *%ld vs %ld (*%f, %f)\n", position, test,
fp[0] * in[4 * test] + fp[1] * in[4 * test + 1] + fp[2] * in[4 * test + 2],
fp[0] * in[4 * position] + fp[1] * in[4 * position + 1] + fp[2] * in[4 * position + 2]);
return 1;
}
if( correctDot != testDot )
if (correctDot != testDot)
{
float relativeError = btFabs((testDot - correctDot) / correctDot);
if (relativeError>1e-6)
if (relativeError > 1e-6)
{
vlog( "Error @ %ld: dotpr misreported! *%f vs %f (*%f, %f)\n", size, correctDot, testDot,
fp[0] * in[4*correct] + fp[1] * in[4*correct+1] + fp[2] * in[4*correct+2],
fp[0] * in[4*test] + fp[1] * in[4*test+1] + fp[2] * in[4*test+2] );
return 1;
} else
vlog("Error @ %ld: dotpr misreported! *%f vs %f (*%f, %f)\n", size, correctDot, testDot,
fp[0] * in[4 * correct] + fp[1] * in[4 * correct + 1] + fp[2] * in[4 * correct + 2],
fp[0] * in[4 * test] + fp[1] * in[4 * test + 1] + fp[2] * in[4 * test + 2]);
return 1;
}
else
{
if (maxRelativeError < relativeError)
{
maxRelativeError = relativeError;
#ifdef VERBOSE_WARNING
sprintf(errStr,"Warning @ %ld: dotpr misreported! *%f vs %f (*%f, %f)\n", size, correctDot, testDot,
fp[0] * in[4*correct] + fp[1] * in[4*correct+1] + fp[2] * in[4*correct+2],
fp[0] * in[4*test] + fp[1] * in[4*test+1] + fp[2] * in[4*test+2]);
#endif //VERBOSE_WARNING
sprintf(errStr, "Warning @ %ld: dotpr misreported! *%f vs %f (*%f, %f)\n", size, correctDot, testDot,
fp[0] * in[4 * correct] + fp[1] * in[4 * correct + 1] + fp[2] * in[4 * correct + 2],
fp[0] * in[4 * test] + fp[1] * in[4 * test + 1] + fp[2] * in[4 * test + 2]);
#endif //VERBOSE_WARNING
}
}
}
memcpy( biggest, old, 16 );
}
}
memcpy(biggest, old, 16);
}
}
if (maxRelativeError)
{
printf("Warning: relative error = %e\n", maxRelativeError);
@@ -136,146 +128,149 @@ int Test_maxdot(void)
#endif
}
uint64_t scalarTimes[33 + (MAX_LOG2_SIZE-5)];
uint64_t vectorTimes[33 + (MAX_LOG2_SIZE-5)];
size_t j, k;
float *in = (float*) data;
for( size = 1; size <= 32; size++ )
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTimes[size] = 0;
for (j = 0; j < 100; j++) {
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
correct += maxdot_ref( (btSimdFloat4*) in, (float*) &localScaling, size, &correctDot);
currentTime = ReadTicks() - startTime;
scalarTimes[size] += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTimes[size] = bestTime;
else
scalarTimes[size] /= 100;
}
uint64_t *timep = &scalarTimes[33];
for( size = 64; size <= MAX_SIZE; size *= 2 )
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
timep[0] =0;
for (j = 0; j < 100; j++) {
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
correct += maxdot_ref( (btSimdFloat4*) in, (float*) &localScaling, size, &correctDot);
currentTime = ReadTicks() - startTime;
timep[0] += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
timep[0] = bestTime;
else
timep[0] /= 100;
uint64_t scalarTimes[33 + (MAX_LOG2_SIZE - 5)];
uint64_t vectorTimes[33 + (MAX_LOG2_SIZE - 5)];
size_t j, k;
float *in = (float *)data;
for (size = 1; size <= 32; size++)
{
uint64_t startTime, bestTime, currentTime;
timep++;
}
bestTime = -1LL;
scalarTimes[size] = 0;
for (j = 0; j < 100; j++)
{
startTime = ReadTicks();
for (k = 0; k < LOOPCOUNT; k++)
correct += maxdot_ref((btSimdFloat4 *)in, (float *)&localScaling, size, &correctDot);
currentTime = ReadTicks() - startTime;
scalarTimes[size] += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
scalarTimes[size] = bestTime;
else
scalarTimes[size] /= 100;
}
for( size = 1; size <= 32; size++ )
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTimes[size] = 0;
for (j = 0; j < 100; j++) {
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
test += localScaling.maxDot( (btVector3*) in, size, testDot);
currentTime = ReadTicks() - startTime;
vectorTimes[size] += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTimes[size] = bestTime;
else
vectorTimes[size] /= 100;
}
timep = &vectorTimes[33];
for( size = 64; size <= MAX_SIZE; size *= 2 )
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
timep[0] =0;
for (j = 0; j < 100; j++) {
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
test += localScaling.maxDot( (btVector3*) in, size, testDot);
currentTime = ReadTicks() - startTime;
timep[0] += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
timep[0] = bestTime;
else
timep[0] /= 100;
timep++;
}
vlog( "Timing:\n" );
vlog( " size\t scalar\t vector\n" );
for( size = 1; size <= 32; size++ )
vlog( "%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles( scalarTimes[size] ) / LOOPCOUNT, TicksToCycles( vectorTimes[size] ) / LOOPCOUNT );
size_t index = 33;
for( size = 64; size <= MAX_SIZE; size *= 2 )
{
vlog( "%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles( scalarTimes[index] ) / LOOPCOUNT, TicksToCycles( vectorTimes[index] ) / LOOPCOUNT );
index++;
}
// Useless check to make sure that the timing loops are not optimized away
if( test != correct )
vlog( "Error: Test != correct: *%ld vs. %ld\n", correct, test);
GuardFree(data);
return 0;
uint64_t *timep = &scalarTimes[33];
for (size = 64; size <= MAX_SIZE; size *= 2)
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
timep[0] = 0;
for (j = 0; j < 100; j++)
{
startTime = ReadTicks();
for (k = 0; k < LOOPCOUNT; k++)
correct += maxdot_ref((btSimdFloat4 *)in, (float *)&localScaling, size, &correctDot);
currentTime = ReadTicks() - startTime;
timep[0] += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
timep[0] = bestTime;
else
timep[0] /= 100;
timep++;
}
for (size = 1; size <= 32; size++)
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTimes[size] = 0;
for (j = 0; j < 100; j++)
{
startTime = ReadTicks();
for (k = 0; k < LOOPCOUNT; k++)
test += localScaling.maxDot((btVector3 *)in, size, testDot);
currentTime = ReadTicks() - startTime;
vectorTimes[size] += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
vectorTimes[size] = bestTime;
else
vectorTimes[size] /= 100;
}
timep = &vectorTimes[33];
for (size = 64; size <= MAX_SIZE; size *= 2)
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
timep[0] = 0;
for (j = 0; j < 100; j++)
{
startTime = ReadTicks();
for (k = 0; k < LOOPCOUNT; k++)
test += localScaling.maxDot((btVector3 *)in, size, testDot);
currentTime = ReadTicks() - startTime;
timep[0] += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
timep[0] = bestTime;
else
timep[0] /= 100;
timep++;
}
vlog("Timing:\n");
vlog(" size\t scalar\t vector\n");
for (size = 1; size <= 32; size++)
vlog("%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles(scalarTimes[size]) / LOOPCOUNT, TicksToCycles(vectorTimes[size]) / LOOPCOUNT);
size_t index = 33;
for (size = 64; size <= MAX_SIZE; size *= 2)
{
vlog("%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles(scalarTimes[index]) / LOOPCOUNT, TicksToCycles(vectorTimes[index]) / LOOPCOUNT);
index++;
}
// Useless check to make sure that the timing loops are not optimized away
if (test != correct)
vlog("Error: Test != correct: *%ld vs. %ld\n", correct, test);
GuardFree(data);
return 0;
}
static long maxdot_ref( const btSimdFloat4 *vertices,
float *vec,
size_t count,
float *dotResult )
static long maxdot_ref(const btSimdFloat4 *vertices,
float *vec,
size_t count,
float *dotResult)
{
const float *dp = (const float*) vertices;
float maxDot = -BT_INFINITY;
long i = 0;
long ptIndex = -1;
for( i = 0; i < count; i++ )
{
float dot = vec[0] * dp[0] + vec[1] * dp[1] + vec[2] * dp[2]; dp += 4;
if( dot > maxDot )
{
maxDot = dot;
ptIndex = i;
}
}
*dotResult = maxDot;
return ptIndex;
const float *dp = (const float *)vertices;
float maxDot = -BT_INFINITY;
long i = 0;
long ptIndex = -1;
for (i = 0; i < count; i++)
{
float dot = vec[0] * dp[0] + vec[1] * dp[1] + vec[2] * dp[2];
dp += 4;
if (dot > maxDot)
{
maxDot = dot;
ptIndex = i;
}
}
*dotResult = maxDot;
return ptIndex;
}
#endif //BT_USE_SSE
#endif //BT_USE_SSE

View File

@@ -9,14 +9,14 @@
#define BulletTest_Test_maxdot_h
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
int Test_maxdot(void);
int Test_maxdot(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -5,11 +5,8 @@
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#if defined(BT_USE_SSE_IN_API) || defined(BT_USE_NEON)
#include "Test_mindot.h"
#include "vector.h"
@@ -20,92 +17,92 @@
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static long mindot_ref( const btSimdFloat4 *vertices,
float *vec,
size_t count,
float *dotResult );
static long mindot_ref(const btSimdFloat4 *vertices,
float *vec,
size_t count,
float *dotResult);
#ifdef __arm__
#define MAX_LOG2_SIZE 9
#define MAX_LOG2_SIZE 9
#else
#define MAX_LOG2_SIZE 9
#define MAX_LOG2_SIZE 9
#endif
#define MAX_SIZE (1U << MAX_LOG2_SIZE)
#define MAX_SIZE (1U << MAX_LOG2_SIZE)
#define LOOPCOUNT 100
int Test_mindot(void)
{
// Init an array flanked by guard pages
btSimdFloat4 *data = (btSimdFloat4*) GuardCalloc( 1, MAX_SIZE * sizeof(btSimdFloat4), NULL );
float *fp = (float*) data;
long correct, test;
btVector3 localScaling( 0.1f, 0.2f, 0.3f);
size_t size;
// Init the data
size_t i;
for( i = 0; i < MAX_SIZE; i++ )
{
fp[4*i] = (int32_t) RANDF_16;
fp[4*i+1] = (int32_t) RANDF_16;
fp[4*i+2] = (int32_t) RANDF_16;
fp[4*i+3] = BT_NAN; // w channel NaN
}
float correctDot, testDot;
fp = (float*) localScaling;
// Init an array flanked by guard pages
btSimdFloat4 *data = (btSimdFloat4 *)GuardCalloc(1, MAX_SIZE * sizeof(btSimdFloat4), NULL);
float *fp = (float *)data;
long correct, test;
btVector3 localScaling(0.1f, 0.2f, 0.3f);
size_t size;
// Init the data
size_t i;
for (i = 0; i < MAX_SIZE; i++)
{
fp[4 * i] = (int32_t)RANDF_16;
fp[4 * i + 1] = (int32_t)RANDF_16;
fp[4 * i + 2] = (int32_t)RANDF_16;
fp[4 * i + 3] = BT_NAN; // w channel NaN
}
float correctDot, testDot;
fp = (float *)localScaling;
float maxRelativeError = 0.f;
for( size = 1; size <= MAX_SIZE; size++ )
{
float *in = (float*)(data + MAX_SIZE - size);
size_t position;
for( position = 0; position < size; position++ )
{
float *biggest = in + position * 4;
float old[4] = { biggest[0], biggest[1], biggest[2], biggest[3] };
biggest[0] -= LARGE_FLOAT17;
biggest[1] -= LARGE_FLOAT17;
biggest[2] -= LARGE_FLOAT17;
biggest[3] -= LARGE_FLOAT17;
correctDot = BT_NAN;
testDot = BT_NAN;
correct = mindot_ref( (btSimdFloat4*) in, (float*) &localScaling, size, &correctDot);
test = localScaling.minDot( (btVector3*) in, size, testDot);
if( test < 0 || test >= size )
{
vlog( "Error @ %ld: index out of bounds! *%ld vs %ld \n", size, correct, test);
continue;
}
if( correct != test )
for (size = 1; size <= MAX_SIZE; size++)
{
float *in = (float *)(data + MAX_SIZE - size);
size_t position;
for (position = 0; position < size; position++)
{
float *biggest = in + position * 4;
float old[4] = {biggest[0], biggest[1], biggest[2], biggest[3]};
biggest[0] -= LARGE_FLOAT17;
biggest[1] -= LARGE_FLOAT17;
biggest[2] -= LARGE_FLOAT17;
biggest[3] -= LARGE_FLOAT17;
correctDot = BT_NAN;
testDot = BT_NAN;
correct = mindot_ref((btSimdFloat4 *)in, (float *)&localScaling, size, &correctDot);
test = localScaling.minDot((btVector3 *)in, size, testDot);
if (test < 0 || test >= size)
{
vlog( "Error @ %ld: index misreported! *%ld vs %ld (*%f, %f)\n", size, correct, test,
fp[0] * in[4*correct] + fp[1] * in[4*correct+1] + fp[2] * in[4*correct+2],
fp[0] * in[4*test] + fp[1] * in[4*test+1] + fp[2] * in[4*test+2] );
vlog("Error @ %ld: index out of bounds! *%ld vs %ld \n", size, correct, test);
continue;
}
if (correct != test)
{
vlog("Error @ %ld: index misreported! *%ld vs %ld (*%f, %f)\n", size, correct, test,
fp[0] * in[4 * correct] + fp[1] * in[4 * correct + 1] + fp[2] * in[4 * correct + 2],
fp[0] * in[4 * test] + fp[1] * in[4 * test + 1] + fp[2] * in[4 * test + 2]);
return 1;
}
if( test != position )
if (test != position)
{
vlog( "Biggest not found where it is supposed to be: *%ld vs %ld (*%f, %f)\n", position, test,
fp[0] * in[4*test] + fp[1] * in[4*test+1] + fp[2] * in[4*test+2],
fp[0] * in[4*position] + fp[1] * in[4*position+1] + fp[2] * in[4*position+2] );
vlog("Biggest not found where it is supposed to be: *%ld vs %ld (*%f, %f)\n", position, test,
fp[0] * in[4 * test] + fp[1] * in[4 * test + 1] + fp[2] * in[4 * test + 2],
fp[0] * in[4 * position] + fp[1] * in[4 * position + 1] + fp[2] * in[4 * position + 2]);
return 1;
}
if( correctDot != testDot )
if (correctDot != testDot)
{
float relativeError = btFabs((testDot - correctDot) / correctDot);
if (relativeError>1e6)
if (relativeError > 1e6)
{
vlog( "Error @ %ld: dotpr misreported! *%f vs %f (*%f, %f)\n", size, correctDot, testDot,
fp[0] * in[4*correct] + fp[1] * in[4*correct+1] + fp[2] * in[4*correct+2],
fp[0] * in[4*test] + fp[1] * in[4*test+1] + fp[2] * in[4*test+2] );
vlog("Error @ %ld: dotpr misreported! *%f vs %f (*%f, %f)\n", size, correctDot, testDot,
fp[0] * in[4 * correct] + fp[1] * in[4 * correct + 1] + fp[2] * in[4 * correct + 2],
fp[0] * in[4 * test] + fp[1] * in[4 * test + 1] + fp[2] * in[4 * test + 2]);
return 1;
} else
}
else
{
if (maxRelativeError < relativeError)
{
@@ -114,156 +111,157 @@ int Test_mindot(void)
}
}
memcpy( biggest, old, 16 );
}
}
memcpy(biggest, old, 16);
}
}
if (maxRelativeError)
{
printf("Warning: relative error = %e\n", maxRelativeError);
}
uint64_t scalarTimes[33 + (MAX_LOG2_SIZE-5)];
uint64_t vectorTimes[33 + (MAX_LOG2_SIZE-5)];
size_t j, k;
float *in = (float*) data;
for( size = 1; size <= 32; size++ )
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTimes[size] = 0;
for (j = 0; j < 100; j++) {
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
correct += mindot_ref( (btSimdFloat4*) in, (float*) &localScaling, size, &correctDot);
currentTime = ReadTicks() - startTime;
scalarTimes[size] += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTimes[size] = bestTime;
else
scalarTimes[size] /= 100;
}
uint64_t *timep = &scalarTimes[33];
for( size = 64; size <= MAX_SIZE; size *= 2 )
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
timep[0] =0;
for (j = 0; j < 100; j++) {
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
correct += mindot_ref( (btSimdFloat4*) in, (float*) &localScaling, size, &correctDot);
currentTime = ReadTicks() - startTime;
timep[0] += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
timep[0] = bestTime;
else
timep[0] /= 100;
timep++;
}
for( size = 1; size <= 32; size++ )
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTimes[size] = 0;
for (j = 0; j < 100; j++) {
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
test += localScaling.minDot( (btVector3*) in, size, testDot);
currentTime = ReadTicks() - startTime;
vectorTimes[size] += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTimes[size] = bestTime;
else
vectorTimes[size] /= 100;
}
timep = &vectorTimes[33];
for( size = 64; size <= MAX_SIZE; size *= 2 )
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
timep[0] =0;
for (j = 0; j < 100; j++) {
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
test += localScaling.minDot( (btVector3*) in, size, testDot);
currentTime = ReadTicks() - startTime;
timep[0] += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
timep[0] = bestTime;
else
timep[0] /= 100;
timep++;
}
vlog( "Timing:\n" );
vlog( " size\t scalar\t vector\n" );
for( size = 1; size <= 32; size++ )
vlog( "%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles( scalarTimes[size] ) / LOOPCOUNT, TicksToCycles( vectorTimes[size] ) / LOOPCOUNT );
size_t index = 33;
for( size = 64; size <= MAX_SIZE; size *= 2 )
{
vlog( "%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles( scalarTimes[index] ) / LOOPCOUNT, TicksToCycles( vectorTimes[index] ) / LOOPCOUNT );
index++;
}
// Useless check to make sure that the timing loops are not optimized away
if( test != correct )
vlog( "Error: Test != correct: *%ld vs. %ld\n", correct, test);
GuardFree(data);
return 0;
uint64_t scalarTimes[33 + (MAX_LOG2_SIZE - 5)];
uint64_t vectorTimes[33 + (MAX_LOG2_SIZE - 5)];
size_t j, k;
float *in = (float *)data;
for (size = 1; size <= 32; size++)
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTimes[size] = 0;
for (j = 0; j < 100; j++)
{
startTime = ReadTicks();
for (k = 0; k < LOOPCOUNT; k++)
correct += mindot_ref((btSimdFloat4 *)in, (float *)&localScaling, size, &correctDot);
currentTime = ReadTicks() - startTime;
scalarTimes[size] += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
scalarTimes[size] = bestTime;
else
scalarTimes[size] /= 100;
}
uint64_t *timep = &scalarTimes[33];
for (size = 64; size <= MAX_SIZE; size *= 2)
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
timep[0] = 0;
for (j = 0; j < 100; j++)
{
startTime = ReadTicks();
for (k = 0; k < LOOPCOUNT; k++)
correct += mindot_ref((btSimdFloat4 *)in, (float *)&localScaling, size, &correctDot);
currentTime = ReadTicks() - startTime;
timep[0] += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
timep[0] = bestTime;
else
timep[0] /= 100;
timep++;
}
for (size = 1; size <= 32; size++)
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTimes[size] = 0;
for (j = 0; j < 100; j++)
{
startTime = ReadTicks();
for (k = 0; k < LOOPCOUNT; k++)
test += localScaling.minDot((btVector3 *)in, size, testDot);
currentTime = ReadTicks() - startTime;
vectorTimes[size] += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
vectorTimes[size] = bestTime;
else
vectorTimes[size] /= 100;
}
timep = &vectorTimes[33];
for (size = 64; size <= MAX_SIZE; size *= 2)
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
timep[0] = 0;
for (j = 0; j < 100; j++)
{
startTime = ReadTicks();
for (k = 0; k < LOOPCOUNT; k++)
test += localScaling.minDot((btVector3 *)in, size, testDot);
currentTime = ReadTicks() - startTime;
timep[0] += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
timep[0] = bestTime;
else
timep[0] /= 100;
timep++;
}
vlog("Timing:\n");
vlog(" size\t scalar\t vector\n");
for (size = 1; size <= 32; size++)
vlog("%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles(scalarTimes[size]) / LOOPCOUNT, TicksToCycles(vectorTimes[size]) / LOOPCOUNT);
size_t index = 33;
for (size = 64; size <= MAX_SIZE; size *= 2)
{
vlog("%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles(scalarTimes[index]) / LOOPCOUNT, TicksToCycles(vectorTimes[index]) / LOOPCOUNT);
index++;
}
// Useless check to make sure that the timing loops are not optimized away
if (test != correct)
vlog("Error: Test != correct: *%ld vs. %ld\n", correct, test);
GuardFree(data);
return 0;
}
static long mindot_ref( const btSimdFloat4 *vertices,
float *vec,
size_t count,
float *dotResult )
static long mindot_ref(const btSimdFloat4 *vertices,
float *vec,
size_t count,
float *dotResult)
{
const float *dp = (const float*) vertices;
float minDot = BT_INFINITY;
long i = 0;
long ptIndex = -1;
for( i = 0; i < count; i++ )
{
float dot = vec[0] * dp[0] + vec[1] * dp[1] + vec[2] * dp[2]; dp += 4;
if( dot < minDot )
{
minDot = dot;
ptIndex = i;
}
}
*dotResult = minDot;
return ptIndex;
const float *dp = (const float *)vertices;
float minDot = BT_INFINITY;
long i = 0;
long ptIndex = -1;
for (i = 0; i < count; i++)
{
float dot = vec[0] * dp[0] + vec[1] * dp[1] + vec[2] * dp[2];
dp += 4;
if (dot < minDot)
{
minDot = dot;
ptIndex = i;
}
}
*dotResult = minDot;
return ptIndex;
}
#endif //BT_USE_SSE
#endif //BT_USE_SSE

View File

@@ -9,14 +9,14 @@
#define BulletTest_Test_mindot_h
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
int Test_mindot(void);
int Test_mindot(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -5,10 +5,8 @@
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#if defined(BT_USE_SSE_IN_API) || defined(BT_USE_NEON)
#include "Test_qtdot.h"
#include "vector.h"
@@ -19,17 +17,16 @@
#include <LinearMath/btQuaternion.h>
#define BT_OP(a, b) (a.dot(b))
#define BT_OP(a, b) (a.dot(b))
// reference code for testing purposes
static inline btScalar qtdot_ref(btQuaternion& q1, btQuaternion& q2);
static inline btScalar qtdot_ref(btQuaternion& q1, btQuaternion& q2)
{
return
q1.x() * q2.x() +
q1.y() * q2.y() +
q1.z() * q2.z() +
q1.w() * q2.w();
return q1.x() * q2.x() +
q1.y() * q2.y() +
q1.z() * q2.z() +
q1.w() * q2.w();
}
#define LOOPCOUNT 1024
@@ -37,126 +34,134 @@ static inline btScalar qtdot_ref(btQuaternion& q1, btQuaternion& q2)
int Test_qtdot(void)
{
btQuaternion q1, q2;
btQuaternion q1, q2;
float x, y, z, w, vNaN;
vNaN = BT_NAN; // w channel NaN
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
q1.setValue(x,y,z,w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
q2.setValue(x,y,z,w);
vNaN = BT_NAN; // w channel NaN
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
q1.setValue(x, y, z, w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
q2.setValue(x, y, z, w);
btScalar correct_res, test_res;
{
correct_res = vNaN;
{
correct_res = vNaN;
test_res = vNaN;
correct_res = qtdot_ref(q1, q2);
test_res = BT_OP(q1,q2);
if( fabsf(correct_res - test_res) > FLT_EPSILON*4 )
{
vlog( "Error - qtdot result error! "
"\ncorrect = %10.4f "
"\ntested = %10.4f \n",
correct_res, test_res);
test_res = BT_OP(q1, q2);
if (fabsf(correct_res - test_res) > FLT_EPSILON * 4)
{
vlog(
"Error - qtdot result error! "
"\ncorrect = %10.4f "
"\ntested = %10.4f \n",
correct_res, test_res);
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btQuaternion qt_arr1[DATA_SIZE];
btQuaternion qt_arr2[DATA_SIZE];
btScalar res_arr[DATA_SIZE];
btScalar res_arr[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
qt_arr1[k].setValue(x,y,z,w);
for (k = 0; k < DATA_SIZE; k++)
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
qt_arr1[k].setValue(x, y, z, w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
qt_arr2[k].setValue(x,y,z,w);
}
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
qt_arr2[k].setValue(x, y, z, w);
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
startTime = ReadTicks();
for (k = 0; k + 4 <= LOOPCOUNT; k += 4)
{
size_t km = (k & (DATA_SIZE-1));
res_arr[km] = qtdot_ref(qt_arr1[km], qt_arr2[km]);km++;
res_arr[km] = qtdot_ref(qt_arr1[km], qt_arr2[km]);km++;
res_arr[km] = qtdot_ref(qt_arr1[km], qt_arr2[km]);km++;
res_arr[km] = qtdot_ref(qt_arr1[km], qt_arr2[km]);
size_t km = (k & (DATA_SIZE - 1));
res_arr[km] = qtdot_ref(qt_arr1[km], qt_arr2[km]);
km++;
res_arr[km] = qtdot_ref(qt_arr1[km], qt_arr2[km]);
km++;
res_arr[km] = qtdot_ref(qt_arr1[km], qt_arr2[km]);
km++;
res_arr[km] = qtdot_ref(qt_arr1[km], qt_arr2[km]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
scalarTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
startTime = ReadTicks();
for (k = 0; k + 4 <= LOOPCOUNT; k += 4)
{
size_t km = (k & (DATA_SIZE-1));
res_arr[km] = BT_OP(qt_arr1[km], qt_arr2[km]);km++;
res_arr[km] = BT_OP(qt_arr1[km], qt_arr2[km]);km++;
res_arr[km] = BT_OP(qt_arr1[km], qt_arr2[km]);km++;
res_arr[km] = BT_OP(qt_arr1[km], qt_arr2[km]);km++;
size_t km = (k & (DATA_SIZE - 1));
res_arr[km] = BT_OP(qt_arr1[km], qt_arr2[km]);
km++;
res_arr[km] = BT_OP(qt_arr1[km], qt_arr2[km]);
km++;
res_arr[km] = BT_OP(qt_arr1[km], qt_arr2[km]);
km++;
res_arr[km] = BT_OP(qt_arr1[km], qt_arr2[km]);
km++;
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vectorTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
vlog("Timing:\n");
vlog(" \t scalar\t vector\n");
vlog(" \t%10.4f\t%10.4f\n", TicksToCycles(scalarTime) / LOOPCOUNT,
TicksToCycles(vectorTime) / LOOPCOUNT);
return 0;
return 0;
}
#endif //BT_USE_SSE
#endif //BT_USE_SSE

View File

@@ -9,14 +9,14 @@
#define BulletTest_Test_qtdot_h
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
int Test_qtdot(void);
int Test_qtdot(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -5,11 +5,8 @@
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#if defined(BT_USE_SSE_IN_API) || defined(BT_USE_NEON)
#include "Test_qtmul.h"
#include "vector.h"
@@ -20,19 +17,19 @@
#include <LinearMath/btQuaternion.h>
#define BT_OP(a, b) ((a) *= (b))
#define BT_OP(a, b) ((a) *= (b))
// reference code for testing purposes
static inline btQuaternion& qtmul_ref(btQuaternion& q1, btQuaternion& q2);
static inline btQuaternion& qtmul_ref(btQuaternion& q1, btQuaternion& q2)
{
float x,y,z,w;
x = q1.w() * q2.x() + q1.x() * q2.w() + q1.y() * q2.z() - q1.z() * q2.y(),
y = q1.w() * q2.y() + q1.y() * q2.w() + q1.z() * q2.x() - q1.x() * q2.z(),
z = q1.w() * q2.z() + q1.z() * q2.w() + q1.x() * q2.y() - q1.y() * q2.x(),
w = q1.w() * q2.w() - q1.x() * q2.x() - q1.y() * q2.y() - q1.z() * q2.z();
float x, y, z, w;
x = q1.w() * q2.x() + q1.x() * q2.w() + q1.y() * q2.z() - q1.z() * q2.y(),
y = q1.w() * q2.y() + q1.y() * q2.w() + q1.z() * q2.x() - q1.x() * q2.z(),
z = q1.w() * q2.z() + q1.z() * q2.w() + q1.x() * q2.y() - q1.y() * q2.x(),
w = q1.w() * q2.w() - q1.x() * q2.x() - q1.y() * q2.y() - q1.z() * q2.z();
q1.setValue(x, y, z, w);
q1.setValue(x, y, z, w);
return q1;
}
@@ -41,143 +38,145 @@ static inline btQuaternion& qtmul_ref(btQuaternion& q1, btQuaternion& q2)
int Test_qtmul(void)
{
btQuaternion q1, q2, q3;
float x, y, z, w, vNaN;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
vNaN = BT_NAN; // w channel NaN
q1.setValue(x,y,z,w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
q2.setValue(x,y,z,w);
btQuaternion q1, q2, q3;
float x, y, z, w, vNaN;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
vNaN = BT_NAN; // w channel NaN
q1.setValue(x, y, z, w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
q2.setValue(x, y, z, w);
q3 = q1;
btQuaternion correct_res, test_res;
{
btQuaternion correct_res, test_res;
{
float vNaN = BT_NAN;
correct_res.setValue(vNaN, vNaN, vNaN, vNaN);
correct_res.setValue(vNaN, vNaN, vNaN, vNaN);
test_res.setValue(vNaN, vNaN, vNaN, vNaN);
correct_res = qtmul_ref(q1, q2);
test_res = BT_OP(q3,q2);
if( fabsf(correct_res.x() - test_res.x()) +
fabsf(correct_res.y() - test_res.y()) +
fabsf(correct_res.z() - test_res.z()) +
fabsf(correct_res.w() - test_res.w()) > FLT_EPSILON*10 )
{
vlog( "Error - qtmul result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f, %10.4f) \n",
correct_res.x(), correct_res.y(),
correct_res.z(), correct_res.w(),
test_res.x(), test_res.y(),
test_res.z(), test_res.w());
test_res = BT_OP(q3, q2);
if (fabsf(correct_res.x() - test_res.x()) +
fabsf(correct_res.y() - test_res.y()) +
fabsf(correct_res.z() - test_res.z()) +
fabsf(correct_res.w() - test_res.w()) >
FLT_EPSILON * 10)
{
vlog(
"Error - qtmul result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f, %10.4f) \n",
correct_res.x(), correct_res.y(),
correct_res.z(), correct_res.w(),
test_res.x(), test_res.y(),
test_res.z(), test_res.w());
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btQuaternion qt_arr1[DATA_SIZE];
btQuaternion qt_arr2[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
qt_arr1[k].setValue(x,y,z,w);
uint64_t startTime, bestTime, currentTime;
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for (k = 0; k < DATA_SIZE; k++)
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
qt_arr2[k].setValue(x,y,z,w);
qt_arr1[k].setValue(x, y, z, w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
qt_arr2[k].setValue(x, y, z, w);
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
startTime = ReadTicks();
for (k = 0; k < LOOPCOUNT; k++)
{
qt_arr1[k] = qtmul_ref(qt_arr1[k], qt_arr2[k]);
qt_arr1[k] = qtmul_ref(qt_arr1[k], qt_arr2[k]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
qt_arr1[k].setValue(x,y,z,w);
scalarTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for (k = 0; k < DATA_SIZE; k++)
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
qt_arr2[k].setValue(x,y,z,w);
qt_arr1[k].setValue(x, y, z, w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
qt_arr2[k].setValue(x, y, z, w);
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
startTime = ReadTicks();
for (k = 0; k < LOOPCOUNT; k++)
{
qt_arr1[k] = BT_OP(qt_arr1[k], qt_arr2[k]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vectorTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
vlog("Timing:\n");
vlog(" \t scalar\t vector\n");
vlog(" \t%10.4f\t%10.4f\n", TicksToCycles(scalarTime) / LOOPCOUNT,
TicksToCycles(vectorTime) / LOOPCOUNT);
return 0;
return 0;
}
#endif //BT_USE_SSE
#endif //BT_USE_SSE

View File

@@ -9,14 +9,14 @@
#define BulletTest_Test_qtmul_h
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
int Test_qtmul(void);
int Test_qtmul(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -5,10 +5,8 @@
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#if defined(BT_USE_SSE_IN_API) || defined(BT_USE_NEON)
#include "Test_qtmulQV3.h"
#include "vector.h"
@@ -19,17 +17,17 @@
#include <LinearMath/btQuaternion.h>
#define BT_OP(a, b) ((a) * (b))
#define BT_OP(a, b) ((a) * (b))
// reference code for testing purposes
static inline btQuaternion qtmulQV3_ref(const btQuaternion& q, const btVector3& w);
static inline btQuaternion qtmulQV3_ref(const btQuaternion& q, const btVector3& w)
{
return btQuaternion(
q.w() * w.x() + q.y() * w.z() - q.z() * w.y(),
q.w() * w.y() + q.z() * w.x() - q.x() * w.z(),
q.w() * w.z() + q.x() * w.y() - q.y() * w.x(),
-q.x() * w.x() - q.y() * w.y() - q.z() * w.z());
return btQuaternion(
q.w() * w.x() + q.y() * w.z() - q.z() * w.y(),
q.w() * w.y() + q.z() * w.x() - q.x() * w.z(),
q.w() * w.z() + q.x() * w.y() - q.y() * w.x(),
-q.x() * w.x() - q.y() * w.y() - q.z() * w.z());
}
#define LOOPCOUNT 1024
@@ -37,126 +35,128 @@ static inline btQuaternion qtmulQV3_ref(const btQuaternion& q, const btVector3&
static inline btSimdFloat4 rand_f4(void)
{
return btAssign128( RANDF_m1p1, RANDF_m1p1, RANDF_m1p1, BT_NAN ); // w channel NaN
return btAssign128(RANDF_m1p1, RANDF_m1p1, RANDF_m1p1, BT_NAN); // w channel NaN
}
static inline btSimdFloat4 qtrand_f4(void)
{
return btAssign128( RANDF_m1p1, RANDF_m1p1, RANDF_m1p1, RANDF_m1p1 );
return btAssign128(RANDF_m1p1, RANDF_m1p1, RANDF_m1p1, RANDF_m1p1);
}
static inline btSimdFloat4 qtNAN_f4(void)
{
return btAssign128( BT_NAN, BT_NAN, BT_NAN, BT_NAN );
return btAssign128(BT_NAN, BT_NAN, BT_NAN, BT_NAN);
}
int Test_qtmulQV3(void)
{
btQuaternion q;
btQuaternion q;
btVector3 v3;
// Init the data
q = btQuaternion(qtrand_f4());
v3 = btVector3(rand_f4());
btQuaternion correct_res, test_res;
correct_res = btQuaternion(qtNAN_f4());
test_res = btQuaternion(qtNAN_f4());
{
// Init the data
q = btQuaternion(qtrand_f4());
v3 = btVector3(rand_f4());
btQuaternion correct_res, test_res;
correct_res = btQuaternion(qtNAN_f4());
test_res = btQuaternion(qtNAN_f4());
{
correct_res = qtmulQV3_ref(q, v3);
test_res = BT_OP(q, v3);
if( fabsf(correct_res.x() - test_res.x()) +
fabsf(correct_res.y() - test_res.y()) +
fabsf(correct_res.z() - test_res.z()) +
fabsf(correct_res.w() - test_res.w()) > FLT_EPSILON*8 )
{
vlog( "Error - qtmulQV3 result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f, %10.4f) \n",
correct_res.x(), correct_res.y(),
correct_res.z(), correct_res.w(),
test_res.x(), test_res.y(),
test_res.z(), test_res.w());
if (fabsf(correct_res.x() - test_res.x()) +
fabsf(correct_res.y() - test_res.y()) +
fabsf(correct_res.z() - test_res.z()) +
fabsf(correct_res.w() - test_res.w()) >
FLT_EPSILON * 8)
{
vlog(
"Error - qtmulQV3 result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f, %10.4f) \n",
correct_res.x(), correct_res.y(),
correct_res.z(), correct_res.w(),
test_res.x(), test_res.y(),
test_res.z(), test_res.w());
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btQuaternion qt_arrR[DATA_SIZE];
btQuaternion qt_arr[DATA_SIZE];
btVector3 v3_arr[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
for (k = 0; k < DATA_SIZE; k++)
{
qt_arr[k] = btQuaternion(qtrand_f4());
v3_arr[k] = btVector3(rand_f4());
qt_arr[k] = btQuaternion(qtrand_f4());
v3_arr[k] = btVector3(rand_f4());
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
startTime = ReadTicks();
for (k = 0; k < LOOPCOUNT; k++)
{
qt_arrR[k] = qtmulQV3_ref(qt_arr[k], v3_arr[k]);
qt_arrR[k] = qtmulQV3_ref(qt_arr[k], v3_arr[k]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
scalarTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
for (k = 0; k < DATA_SIZE; k++)
{
qt_arr[k] = btQuaternion(qtrand_f4());
v3_arr[k] = btVector3(rand_f4());
qt_arr[k] = btQuaternion(qtrand_f4());
v3_arr[k] = btVector3(rand_f4());
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
startTime = ReadTicks();
for (k = 0; k < LOOPCOUNT; k++)
{
qt_arrR[k] = BT_OP(qt_arr[k], v3_arr[k]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vectorTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
vlog("Timing:\n");
vlog(" \t scalar\t vector\n");
vlog(" \t%10.4f\t%10.4f\n", TicksToCycles(scalarTime) / LOOPCOUNT,
TicksToCycles(vectorTime) / LOOPCOUNT);
return 0;
return 0;
}
#endif //BT_USE_SSE
#endif //BT_USE_SSE

View File

@@ -9,14 +9,14 @@
#define BulletTest_Test_qtmulQV3_h
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
int Test_qtmulQV3(void);
int Test_qtmulQV3(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -5,9 +5,8 @@
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#if defined(BT_USE_SSE_IN_API) || defined(BT_USE_NEON)
#include "Test_qtmulV3Q.h"
#include "vector.h"
@@ -18,17 +17,17 @@
#include <LinearMath/btQuaternion.h>
#define BT_OP(a, b) ((a) * (b))
#define BT_OP(a, b) ((a) * (b))
// reference code for testing purposes
static inline btQuaternion qtmulV3Q_ref(const btVector3& w, const btQuaternion& q);
static inline btQuaternion qtmulV3Q_ref(const btVector3& w, const btQuaternion& q)
{
return btQuaternion(
+w.x() * q.w() + w.y() * q.z() - w.z() * q.y(),
return btQuaternion(
+w.x() * q.w() + w.y() * q.z() - w.z() * q.y(),
+w.y() * q.w() + w.z() * q.x() - w.x() * q.z(),
+w.z() * q.w() + w.x() * q.y() - w.y() * q.x(),
-w.x() * q.x() - w.y() * q.y() - w.z() * q.z());
-w.x() * q.x() - w.y() * q.y() - w.z() * q.z());
}
#define LOOPCOUNT 1024
@@ -36,126 +35,128 @@ static inline btQuaternion qtmulV3Q_ref(const btVector3& w, const btQuaternion&
static inline btSimdFloat4 rand_f4(void)
{
return btAssign128( RANDF_m1p1, RANDF_m1p1, RANDF_m1p1, BT_NAN ); // w channel NaN
return btAssign128(RANDF_m1p1, RANDF_m1p1, RANDF_m1p1, BT_NAN); // w channel NaN
}
static inline btSimdFloat4 qtrand_f4(void)
{
return btAssign128( RANDF_m1p1, RANDF_m1p1, RANDF_m1p1, RANDF_m1p1 );
return btAssign128(RANDF_m1p1, RANDF_m1p1, RANDF_m1p1, RANDF_m1p1);
}
static inline btSimdFloat4 qtNAN_f4(void)
{
return btAssign128( BT_NAN, BT_NAN, BT_NAN, BT_NAN );
return btAssign128(BT_NAN, BT_NAN, BT_NAN, BT_NAN);
}
int Test_qtmulV3Q(void)
{
btQuaternion q;
btQuaternion q;
btVector3 v3;
// Init the data
q = btQuaternion(qtrand_f4());
v3 = btVector3(rand_f4());
btQuaternion correct_res, test_res;
correct_res = btQuaternion(qtNAN_f4());
test_res = btQuaternion(qtNAN_f4());
{
// Init the data
q = btQuaternion(qtrand_f4());
v3 = btVector3(rand_f4());
btQuaternion correct_res, test_res;
correct_res = btQuaternion(qtNAN_f4());
test_res = btQuaternion(qtNAN_f4());
{
correct_res = qtmulV3Q_ref(v3, q);
test_res = BT_OP(v3, q);
if( fabsf(correct_res.x() - test_res.x()) +
fabsf(correct_res.y() - test_res.y()) +
fabsf(correct_res.z() - test_res.z()) +
fabsf(correct_res.w() - test_res.w()) > FLT_EPSILON*8 )
{
vlog( "Error - qtmulV3Q result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f, %10.4f) \n",
correct_res.x(), correct_res.y(),
correct_res.z(), correct_res.w(),
test_res.x(), test_res.y(),
test_res.z(), test_res.w());
if (fabsf(correct_res.x() - test_res.x()) +
fabsf(correct_res.y() - test_res.y()) +
fabsf(correct_res.z() - test_res.z()) +
fabsf(correct_res.w() - test_res.w()) >
FLT_EPSILON * 8)
{
vlog(
"Error - qtmulV3Q result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f, %10.4f) \n",
correct_res.x(), correct_res.y(),
correct_res.z(), correct_res.w(),
test_res.x(), test_res.y(),
test_res.z(), test_res.w());
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btQuaternion qt_arrR[DATA_SIZE];
btQuaternion qt_arr[DATA_SIZE];
btVector3 v3_arr[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
for (k = 0; k < DATA_SIZE; k++)
{
qt_arr[k] = btQuaternion(qtrand_f4());
v3_arr[k] = btVector3(rand_f4());
qt_arr[k] = btQuaternion(qtrand_f4());
v3_arr[k] = btVector3(rand_f4());
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
startTime = ReadTicks();
for (k = 0; k < LOOPCOUNT; k++)
{
qt_arrR[k] = qtmulV3Q_ref(v3_arr[k], qt_arr[k]);
qt_arrR[k] = qtmulV3Q_ref(v3_arr[k], qt_arr[k]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
scalarTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
for (k = 0; k < DATA_SIZE; k++)
{
qt_arr[k] = btQuaternion(qtrand_f4());
v3_arr[k] = btVector3(rand_f4());
qt_arr[k] = btQuaternion(qtrand_f4());
v3_arr[k] = btVector3(rand_f4());
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
startTime = ReadTicks();
for (k = 0; k < LOOPCOUNT; k++)
{
qt_arrR[k] = BT_OP(v3_arr[k], qt_arr[k]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vectorTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
vlog("Timing:\n");
vlog(" \t scalar\t vector\n");
vlog(" \t%10.4f\t%10.4f\n", TicksToCycles(scalarTime) / LOOPCOUNT,
TicksToCycles(vectorTime) / LOOPCOUNT);
return 0;
return 0;
}
#endif//#ifdef BT_USE_SSE
#endif //#ifdef BT_USE_SSE

View File

@@ -9,14 +9,14 @@
#define BulletTest_Test_qtmulV3Q_h
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
int Test_qtmulV3Q(void);
int Test_qtmulV3Q(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -5,11 +5,8 @@
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#if defined(BT_USE_SSE_IN_API) || defined(BT_USE_NEON)
#include "Test_qtnorm.h"
#include "vector.h"
@@ -20,23 +17,23 @@
#include <LinearMath/btQuaternion.h>
#define BT_OP(a) (a.normalize())
#define BT_OP(a) (a.normalize())
// reference code for testing purposes
static inline btQuaternion& qtnorm_ref(btQuaternion& q1);
static inline btQuaternion& qtnorm_ref(btQuaternion& q1)
{
float dot =
q1.x() * q1.x() +
q1.y() * q1.y() +
q1.z() * q1.z() +
q1.w() * q1.w();
float dot =
q1.x() * q1.x() +
q1.y() * q1.y() +
q1.z() * q1.z() +
q1.w() * q1.w();
dot = 1.0f / sqrtf(dot);
q1.setValue(q1.x()*dot, q1.y()*dot, q1.z()*dot, q1.w()*dot);
q1.setValue(q1.x() * dot, q1.y() * dot, q1.z() * dot, q1.w() * dot);
return q1;
return q1;
}
#define LOOPCOUNT 1024
@@ -44,133 +41,142 @@ static inline btQuaternion& qtnorm_ref(btQuaternion& q1)
int Test_qtnorm(void)
{
int i;
btQuaternion q1, q2;
int i;
btQuaternion q1, q2;
float x, y, z, w, vNaN;
vNaN = BT_NAN; // w channel NaN
btQuaternion correct_res, test_res;
for (i=0; i<LOOPCOUNT; i++)
{
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
q1.setValue(x,y,z,w);
q2 = q1;
vNaN = BT_NAN; // w channel NaN
correct_res.setValue(vNaN, vNaN, vNaN, vNaN);
btQuaternion correct_res, test_res;
for (i = 0; i < LOOPCOUNT; i++)
{
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
q1.setValue(x, y, z, w);
q2 = q1;
correct_res.setValue(vNaN, vNaN, vNaN, vNaN);
test_res.setValue(vNaN, vNaN, vNaN, vNaN);
correct_res = qtnorm_ref(q1);
test_res = BT_OP(q2);
if( fabsf(correct_res.x() - test_res.x()) +
fabsf(correct_res.y() - test_res.y()) +
fabsf(correct_res.z() - test_res.z()) +
fabsf(correct_res.w() - test_res.w()) > FLT_EPSILON*10 )
{
vlog( "Error - qtnorm result error! "
"\ncorrect = (%10.7f, %10.7f, %10.7f, %10.7f) "
"\ntested = (%10.7f, %10.7f, %10.7f, %10.7f) \n",
correct_res.x(), correct_res.y(),
correct_res.z(), correct_res.w(),
test_res.x(), test_res.y(),
test_res.z(), test_res.w());
if (fabsf(correct_res.x() - test_res.x()) +
fabsf(correct_res.y() - test_res.y()) +
fabsf(correct_res.z() - test_res.z()) +
fabsf(correct_res.w() - test_res.w()) >
FLT_EPSILON * 10)
{
vlog(
"Error - qtnorm result error! "
"\ncorrect = (%10.7f, %10.7f, %10.7f, %10.7f) "
"\ntested = (%10.7f, %10.7f, %10.7f, %10.7f) \n",
correct_res.x(), correct_res.y(),
correct_res.z(), correct_res.w(),
test_res.x(), test_res.y(),
test_res.z(), test_res.w());
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btQuaternion qt_arr0[DATA_SIZE];
btQuaternion qt_arr1[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
qt_arr1[k].setValue(x,y,z,w);
}
uint64_t startTime, bestTime, currentTime;
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for (k = 0; k < DATA_SIZE; k++)
{
size_t km = (k & (DATA_SIZE-1));
qt_arr0[km] = qtnorm_ref(qt_arr1[km]);km++;
qt_arr0[km] = qtnorm_ref(qt_arr1[km]);km++;
qt_arr0[km] = qtnorm_ref(qt_arr1[km]);km++;
qt_arr0[km] = qtnorm_ref(qt_arr1[km]);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
qt_arr1[k].setValue(x, y, z, w);
}
startTime = ReadTicks();
for (k = 0; k + 4 <= LOOPCOUNT; k += 4)
{
size_t km = (k & (DATA_SIZE - 1));
qt_arr0[km] = qtnorm_ref(qt_arr1[km]);
km++;
qt_arr0[km] = qtnorm_ref(qt_arr1[km]);
km++;
qt_arr0[km] = qtnorm_ref(qt_arr1[km]);
km++;
qt_arr0[km] = qtnorm_ref(qt_arr1[km]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
scalarTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
qt_arr1[k].setValue(x,y,z,w);
}
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
for (k = 0; k < DATA_SIZE; k++)
{
size_t km = (k & (DATA_SIZE-1));
qt_arr0[km] = BT_OP(qt_arr1[km]);km++;
qt_arr0[km] = BT_OP(qt_arr1[km]);km++;
qt_arr0[km] = BT_OP(qt_arr1[km]);km++;
qt_arr0[km] = BT_OP(qt_arr1[km]);km++;
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
qt_arr1[k].setValue(x, y, z, w);
}
startTime = ReadTicks();
for (k = 0; k + 4 <= LOOPCOUNT; k += 4)
{
size_t km = (k & (DATA_SIZE - 1));
qt_arr0[km] = BT_OP(qt_arr1[km]);
km++;
qt_arr0[km] = BT_OP(qt_arr1[km]);
km++;
qt_arr0[km] = BT_OP(qt_arr1[km]);
km++;
qt_arr0[km] = BT_OP(qt_arr1[km]);
km++;
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vectorTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
vlog("Timing:\n");
vlog(" \t scalar\t vector\n");
vlog(" \t%10.4f\t%10.4f\n", TicksToCycles(scalarTime) / LOOPCOUNT,
TicksToCycles(vectorTime) / LOOPCOUNT);
return 0;
return 0;
}
#endif //BT_USE_SSE
#endif //BT_USE_SSE

View File

@@ -9,14 +9,14 @@
#define BulletTest_Test_qtnorm_h
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
int Test_qtnorm(void);
int Test_qtnorm(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -5,9 +5,8 @@
// Copyright (c) 2011 Apple Inc., Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#if defined(BT_USE_SSE_IN_API) || defined(BT_USE_NEON)
#include "Test_quat_aos_neon.h"
#include "vector.h"
@@ -16,584 +15,574 @@
#include "../vectormath/vmInclude.h"
//typedef Vectormath::Aos::Vector3 vmVector3;
//typedef Vectormath::Aos::Quat vmQuat;
//typedef Vectormath::Aos::Matrix3 vmMatrix3;
//typedef Vectormath::Aos::Transform3 vmTransform3;
//typedef Vectormath::Aos::Point3 vmPoint3;
typedef Vectormath::Aos::Vector4 vmVector4;
typedef Vectormath::Aos::Vector4 vmVector4;
// reference code for testing purposes
ATTRIBUTE_ALIGNED16(class) Quat_ref
ATTRIBUTE_ALIGNED16(class)
Quat_ref
{
float mX;
float mY;
float mZ;
float mW;
float mX;
float mY;
float mZ;
float mW;
public:
// Default constructor; does no initialization
//
inline Quat_ref( ) { };
// Copy a quaternion
//
inline Quat_ref( const Quat_ref & quat );
// Construct a quaternion from x, y, z, and w elements
//
inline Quat_ref( float x, float y, float z, float w );
// Construct a quaternion from a 3-D vector and a scalar
//
inline Quat_ref( const vmVector3 & xyz, float w );
// Copy elements from a 4-D vector into a quaternion
//
explicit inline Quat_ref( const vmVector4 & vec );
// Convert a rotation matrix to a unit-length quaternion
//
explicit inline Quat_ref( const vmMatrix3 & rotMat );
// Set all elements of a quaternion to the same scalar value
//
explicit inline Quat_ref( float scalar );
// Assign one quaternion to another
//
inline Quat_ref & operator =( const Quat_ref & quat );
// Set the x, y, and z elements of a quaternion
// NOTE:
// This function does not change the w element.
//
inline Quat_ref & setXYZ( const vmVector3 & vec );
// Get the x, y, and z elements of a quaternion
//
inline const vmVector3 getXYZ( ) const;
// Set the x element of a quaternion
//
inline Quat_ref & setX( float x );
// Set the y element of a quaternion
//
inline Quat_ref & setY( float y );
// Set the z element of a quaternion
//
inline Quat_ref & setZ( float z );
// Set the w element of a quaternion
//
inline Quat_ref & setW( float w );
// Get the x element of a quaternion
//
inline float getX( ) const;
// Get the y element of a quaternion
//
inline float getY( ) const;
// Get the z element of a quaternion
//
inline float getZ( ) const;
// Get the w element of a quaternion
//
inline float getW( ) const;
// Set an x, y, z, or w element of a quaternion by index
//
inline Quat_ref & setElem( int idx, float value );
// Get an x, y, z, or w element of a quaternion by index
//
inline float getElem( int idx ) const;
// Subscripting operator to set or get an element
//
inline float & operator []( int idx );
// Subscripting operator to get an element
//
inline float operator []( int idx ) const;
// Add two quaternions
//
inline const Quat_ref operator +( const Quat_ref & quat ) const;
// Subtract a quaternion from another quaternion
//
inline const Quat_ref operator -( const Quat_ref & quat ) const;
// Multiply two quaternions
//
inline const Quat_ref operator *( const Quat_ref & quat ) const;
// Multiply a quaternion by a scalar
//
inline const Quat_ref operator *( float scalar ) const;
// Divide a quaternion by a scalar
//
inline const Quat_ref operator /( float scalar ) const;
// Perform compound assignment and addition with a quaternion
//
inline Quat_ref & operator +=( const Quat_ref & quat );
// Perform compound assignment and subtraction by a quaternion
//
inline Quat_ref & operator -=( const Quat_ref & quat );
// Perform compound assignment and multiplication by a quaternion
//
inline Quat_ref & operator *=( const Quat_ref & quat );
// Perform compound assignment and multiplication by a scalar
//
inline Quat_ref & operator *=( float scalar );
// Perform compound assignment and division by a scalar
//
inline Quat_ref & operator /=( float scalar );
// Negate all elements of a quaternion
//
inline const Quat_ref operator -( ) const;
// Construct an identity quaternion
//
static inline const Quat_ref identity( );
// Construct a quaternion to rotate between two unit-length 3-D vectors
// NOTE:
// The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
//
static inline const Quat_ref rotation( const vmVector3 & unitVec0, const vmVector3 & unitVec1 );
// Construct a quaternion to rotate around a unit-length 3-D vector
//
static inline const Quat_ref rotation( float radians, const vmVector3 & unitVec );
// Construct a quaternion to rotate around the x axis
//
static inline const Quat_ref rotationX( float radians );
// Construct a quaternion to rotate around the y axis
//
static inline const Quat_ref rotationY( float radians );
// Construct a quaternion to rotate around the z axis
//
static inline const Quat_ref rotationZ( float radians );
// Default constructor; does no initialization
//
inline Quat_ref(){};
// Copy a quaternion
//
inline Quat_ref(const Quat_ref &quat);
// Construct a quaternion from x, y, z, and w elements
//
inline Quat_ref(float x, float y, float z, float w);
// Construct a quaternion from a 3-D vector and a scalar
//
inline Quat_ref(const vmVector3 &xyz, float w);
// Copy elements from a 4-D vector into a quaternion
//
explicit inline Quat_ref(const vmVector4 &vec);
// Convert a rotation matrix to a unit-length quaternion
//
explicit inline Quat_ref(const vmMatrix3 &rotMat);
// Set all elements of a quaternion to the same scalar value
//
explicit inline Quat_ref(float scalar);
// Assign one quaternion to another
//
inline Quat_ref &operator=(const Quat_ref &quat);
// Set the x, y, and z elements of a quaternion
// NOTE:
// This function does not change the w element.
//
inline Quat_ref &setXYZ(const vmVector3 &vec);
// Get the x, y, and z elements of a quaternion
//
inline const vmVector3 getXYZ() const;
// Set the x element of a quaternion
//
inline Quat_ref &setX(float x);
// Set the y element of a quaternion
//
inline Quat_ref &setY(float y);
// Set the z element of a quaternion
//
inline Quat_ref &setZ(float z);
// Set the w element of a quaternion
//
inline Quat_ref &setW(float w);
// Get the x element of a quaternion
//
inline float getX() const;
// Get the y element of a quaternion
//
inline float getY() const;
// Get the z element of a quaternion
//
inline float getZ() const;
// Get the w element of a quaternion
//
inline float getW() const;
// Set an x, y, z, or w element of a quaternion by index
//
inline Quat_ref &setElem(int idx, float value);
// Get an x, y, z, or w element of a quaternion by index
//
inline float getElem(int idx) const;
// Subscripting operator to set or get an element
//
inline float &operator[](int idx);
// Subscripting operator to get an element
//
inline float operator[](int idx) const;
// Add two quaternions
//
inline const Quat_ref operator+(const Quat_ref &quat) const;
// Subtract a quaternion from another quaternion
//
inline const Quat_ref operator-(const Quat_ref &quat) const;
// Multiply two quaternions
//
inline const Quat_ref operator*(const Quat_ref &quat) const;
// Multiply a quaternion by a scalar
//
inline const Quat_ref operator*(float scalar) const;
// Divide a quaternion by a scalar
//
inline const Quat_ref operator/(float scalar) const;
// Perform compound assignment and addition with a quaternion
//
inline Quat_ref &operator+=(const Quat_ref &quat);
// Perform compound assignment and subtraction by a quaternion
//
inline Quat_ref &operator-=(const Quat_ref &quat);
// Perform compound assignment and multiplication by a quaternion
//
inline Quat_ref &operator*=(const Quat_ref &quat);
// Perform compound assignment and multiplication by a scalar
//
inline Quat_ref &operator*=(float scalar);
// Perform compound assignment and division by a scalar
//
inline Quat_ref &operator/=(float scalar);
// Negate all elements of a quaternion
//
inline const Quat_ref operator-() const;
// Construct an identity quaternion
//
static inline const Quat_ref identity();
// Construct a quaternion to rotate between two unit-length 3-D vectors
// NOTE:
// The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
//
static inline const Quat_ref rotation(const vmVector3 &unitVec0, const vmVector3 &unitVec1);
// Construct a quaternion to rotate around a unit-length 3-D vector
//
static inline const Quat_ref rotation(float radians, const vmVector3 &unitVec);
// Construct a quaternion to rotate around the x axis
//
static inline const Quat_ref rotationX(float radians);
// Construct a quaternion to rotate around the y axis
//
static inline const Quat_ref rotationY(float radians);
// Construct a quaternion to rotate around the z axis
//
static inline const Quat_ref rotationZ(float radians);
};
inline Quat_ref::Quat_ref( const Quat_ref & quat )
inline Quat_ref::Quat_ref(const Quat_ref &quat)
{
mX = quat.mX;
mY = quat.mY;
mZ = quat.mZ;
mW = quat.mW;
mX = quat.mX;
mY = quat.mY;
mZ = quat.mZ;
mW = quat.mW;
}
inline Quat_ref::Quat_ref( float _x, float _y, float _z, float _w )
inline Quat_ref::Quat_ref(float _x, float _y, float _z, float _w)
{
mX = _x;
mY = _y;
mZ = _z;
mW = _w;
mX = _x;
mY = _y;
mZ = _z;
mW = _w;
}
inline Quat_ref::Quat_ref( const vmVector3 & xyz, float _w )
inline Quat_ref::Quat_ref(const vmVector3 &xyz, float _w)
{
this->setXYZ( xyz );
this->setW( _w );
this->setXYZ(xyz);
this->setW(_w);
}
inline Quat_ref::Quat_ref( const vmVector4 & vec )
inline Quat_ref::Quat_ref(const vmVector4 &vec)
{
mX = vec.getX();
mY = vec.getY();
mZ = vec.getZ();
mW = vec.getW();
mX = vec.getX();
mY = vec.getY();
mZ = vec.getZ();
mW = vec.getW();
}
inline Quat_ref::Quat_ref( float scalar )
inline Quat_ref::Quat_ref(float scalar)
{
mX = scalar;
mY = scalar;
mZ = scalar;
mW = scalar;
mX = scalar;
mY = scalar;
mZ = scalar;
mW = scalar;
}
inline const Quat_ref Quat_ref::identity( )
inline const Quat_ref Quat_ref::identity()
{
return Quat_ref( 0.0f, 0.0f, 0.0f, 1.0f );
return Quat_ref(0.0f, 0.0f, 0.0f, 1.0f);
}
inline void loadXYZW_ref( Quat_ref & quat, const float * fptr )
inline void loadXYZW_ref(Quat_ref &quat, const float *fptr)
{
quat = Quat_ref( fptr[0], fptr[1], fptr[2], fptr[3] );
quat = Quat_ref(fptr[0], fptr[1], fptr[2], fptr[3]);
}
inline void storeXYZW_ref( const Quat_ref & quat, float * fptr )
inline void storeXYZW_ref(const Quat_ref &quat, float *fptr)
{
fptr[0] = quat.getX();
fptr[1] = quat.getY();
fptr[2] = quat.getZ();
fptr[3] = quat.getW();
fptr[0] = quat.getX();
fptr[1] = quat.getY();
fptr[2] = quat.getZ();
fptr[3] = quat.getW();
}
inline Quat_ref & Quat_ref::operator =( const Quat_ref & quat )
inline Quat_ref &Quat_ref::operator=(const Quat_ref &quat)
{
mX = quat.mX;
mY = quat.mY;
mZ = quat.mZ;
mW = quat.mW;
return *this;
mX = quat.mX;
mY = quat.mY;
mZ = quat.mZ;
mW = quat.mW;
return *this;
}
inline Quat_ref & Quat_ref::setXYZ( const vmVector3 & vec )
inline Quat_ref &Quat_ref::setXYZ(const vmVector3 &vec)
{
mX = vec.getX();
mY = vec.getY();
mZ = vec.getZ();
return *this;
mX = vec.getX();
mY = vec.getY();
mZ = vec.getZ();
return *this;
}
inline const vmVector3 Quat_ref::getXYZ( ) const
inline const vmVector3 Quat_ref::getXYZ() const
{
return vmVector3( mX, mY, mZ );
return vmVector3(mX, mY, mZ);
}
inline Quat_ref & Quat_ref::setX( float _x )
inline Quat_ref &Quat_ref::setX(float _x)
{
mX = _x;
return *this;
mX = _x;
return *this;
}
inline float Quat_ref::getX( ) const
inline float Quat_ref::getX() const
{
return mX;
return mX;
}
inline Quat_ref & Quat_ref::setY( float _y )
inline Quat_ref &Quat_ref::setY(float _y)
{
mY = _y;
return *this;
mY = _y;
return *this;
}
inline float Quat_ref::getY( ) const
inline float Quat_ref::getY() const
{
return mY;
return mY;
}
inline Quat_ref & Quat_ref::setZ( float _z )
inline Quat_ref &Quat_ref::setZ(float _z)
{
mZ = _z;
return *this;
mZ = _z;
return *this;
}
inline float Quat_ref::getZ( ) const
inline float Quat_ref::getZ() const
{
return mZ;
return mZ;
}
inline Quat_ref & Quat_ref::setW( float _w )
inline Quat_ref &Quat_ref::setW(float _w)
{
mW = _w;
return *this;
mW = _w;
return *this;
}
inline float Quat_ref::getW( ) const
inline float Quat_ref::getW() const
{
return mW;
return mW;
}
inline Quat_ref & Quat_ref::setElem( int idx, float value )
inline Quat_ref &Quat_ref::setElem(int idx, float value)
{
*(&mX + idx) = value;
return *this;
*(&mX + idx) = value;
return *this;
}
inline float Quat_ref::getElem( int idx ) const
inline float Quat_ref::getElem(int idx) const
{
return *(&mX + idx);
return *(&mX + idx);
}
inline float & Quat_ref::operator []( int idx )
inline float &Quat_ref::operator[](int idx)
{
return *(&mX + idx);
return *(&mX + idx);
}
inline float Quat_ref::operator []( int idx ) const
inline float Quat_ref::operator[](int idx) const
{
return *(&mX + idx);
return *(&mX + idx);
}
inline const Quat_ref Quat_ref::operator +( const Quat_ref & quat ) const
inline const Quat_ref Quat_ref::operator+(const Quat_ref &quat) const
{
return Quat_ref(
( mX + quat.mX ),
( mY + quat.mY ),
( mZ + quat.mZ ),
( mW + quat.mW )
);
return Quat_ref(
(mX + quat.mX),
(mY + quat.mY),
(mZ + quat.mZ),
(mW + quat.mW));
}
inline const Quat_ref Quat_ref::operator -( const Quat_ref & quat ) const
inline const Quat_ref Quat_ref::operator-(const Quat_ref &quat) const
{
return Quat_ref(
( mX - quat.mX ),
( mY - quat.mY ),
( mZ - quat.mZ ),
( mW - quat.mW )
);
return Quat_ref(
(mX - quat.mX),
(mY - quat.mY),
(mZ - quat.mZ),
(mW - quat.mW));
}
inline const Quat_ref Quat_ref::operator *( float scalar ) const
inline const Quat_ref Quat_ref::operator*(float scalar) const
{
return Quat_ref(
( mX * scalar ),
( mY * scalar ),
( mZ * scalar ),
( mW * scalar )
);
return Quat_ref(
(mX * scalar),
(mY * scalar),
(mZ * scalar),
(mW * scalar));
}
inline Quat_ref & Quat_ref::operator +=( const Quat_ref & quat )
inline Quat_ref &Quat_ref::operator+=(const Quat_ref &quat)
{
*this = *this + quat;
return *this;
*this = *this + quat;
return *this;
}
inline Quat_ref & Quat_ref::operator -=( const Quat_ref & quat )
inline Quat_ref &Quat_ref::operator-=(const Quat_ref &quat)
{
*this = *this - quat;
return *this;
*this = *this - quat;
return *this;
}
inline Quat_ref & Quat_ref::operator *=( float scalar )
inline Quat_ref &Quat_ref::operator*=(float scalar)
{
*this = *this * scalar;
return *this;
*this = *this * scalar;
return *this;
}
inline const Quat_ref Quat_ref::operator /( float scalar ) const
inline const Quat_ref Quat_ref::operator/(float scalar) const
{
return Quat_ref(
( mX / scalar ),
( mY / scalar ),
( mZ / scalar ),
( mW / scalar )
);
return Quat_ref(
(mX / scalar),
(mY / scalar),
(mZ / scalar),
(mW / scalar));
}
inline Quat_ref & Quat_ref::operator /=( float scalar )
inline Quat_ref &Quat_ref::operator/=(float scalar)
{
*this = *this / scalar;
return *this;
*this = *this / scalar;
return *this;
}
inline const Quat_ref Quat_ref::operator -( ) const
inline const Quat_ref Quat_ref::operator-() const
{
return Quat_ref(
-mX,
-mY,
-mZ,
-mW
);
return Quat_ref(
-mX,
-mY,
-mZ,
-mW);
}
inline const Quat_ref operator *( float scalar, const Quat_ref & quat )
inline const Quat_ref operator*(float scalar, const Quat_ref &quat)
{
return quat * scalar;
return quat * scalar;
}
inline float dot( const Quat_ref & quat0, const Quat_ref & quat1 )
inline float dot(const Quat_ref &quat0, const Quat_ref &quat1)
{
float result;
result = ( quat0.getX() * quat1.getX() );
result = ( result + ( quat0.getY() * quat1.getY() ) );
result = ( result + ( quat0.getZ() * quat1.getZ() ) );
result = ( result + ( quat0.getW() * quat1.getW() ) );
return result;
float result;
result = (quat0.getX() * quat1.getX());
result = (result + (quat0.getY() * quat1.getY()));
result = (result + (quat0.getZ() * quat1.getZ()));
result = (result + (quat0.getW() * quat1.getW()));
return result;
}
inline const Quat_ref lerp( float t, const Quat_ref & quat0, const Quat_ref & quat1 )
inline const Quat_ref lerp(float t, const Quat_ref &quat0, const Quat_ref &quat1)
{
return ( quat0 + ( ( quat1 - quat0 ) * t ) );
return (quat0 + ((quat1 - quat0) * t));
}
inline const Quat_ref slerp( float t, const Quat_ref & unitQuat0, const Quat_ref & unitQuat1 )
inline const Quat_ref slerp(float t, const Quat_ref &unitQuat0, const Quat_ref &unitQuat1)
{
Quat_ref start;
float recipSinAngle, scale0, scale1, cosAngle, angle;
cosAngle = dot( unitQuat0, unitQuat1 );
if ( cosAngle < 0.0f ) {
cosAngle = -cosAngle;
start = ( -unitQuat0 );
} else {
start = unitQuat0;
}
if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
angle = acosf( cosAngle );
recipSinAngle = ( 1.0f / sinf( angle ) );
scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
} else {
scale0 = ( 1.0f - t );
scale1 = t;
}
return ( ( start * scale0 ) + ( unitQuat1 * scale1 ) );
Quat_ref start;
float recipSinAngle, scale0, scale1, cosAngle, angle;
cosAngle = dot(unitQuat0, unitQuat1);
if (cosAngle < 0.0f)
{
cosAngle = -cosAngle;
start = (-unitQuat0);
}
else
{
start = unitQuat0;
}
if (cosAngle < _VECTORMATH_SLERP_TOL)
{
angle = acosf(cosAngle);
recipSinAngle = (1.0f / sinf(angle));
scale0 = (sinf(((1.0f - t) * angle)) * recipSinAngle);
scale1 = (sinf((t * angle)) * recipSinAngle);
}
else
{
scale0 = (1.0f - t);
scale1 = t;
}
return ((start * scale0) + (unitQuat1 * scale1));
}
inline const Quat_ref squad( float t, const Quat_ref & unitQuat0, const Quat_ref & unitQuat1, const Quat_ref & unitQuat2, const Quat_ref & unitQuat3 )
inline const Quat_ref squad(float t, const Quat_ref &unitQuat0, const Quat_ref &unitQuat1, const Quat_ref &unitQuat2, const Quat_ref &unitQuat3)
{
Quat_ref tmp0, tmp1;
tmp0 = slerp( t, unitQuat0, unitQuat3 );
tmp1 = slerp( t, unitQuat1, unitQuat2 );
return slerp( ( ( 2.0f * t ) * ( 1.0f - t ) ), tmp0, tmp1 );
Quat_ref tmp0, tmp1;
tmp0 = slerp(t, unitQuat0, unitQuat3);
tmp1 = slerp(t, unitQuat1, unitQuat2);
return slerp(((2.0f * t) * (1.0f - t)), tmp0, tmp1);
}
inline float norm( const Quat_ref & quat )
inline float norm(const Quat_ref &quat)
{
float result;
result = ( quat.getX() * quat.getX() );
result = ( result + ( quat.getY() * quat.getY() ) );
result = ( result + ( quat.getZ() * quat.getZ() ) );
result = ( result + ( quat.getW() * quat.getW() ) );
return result;
float result;
result = (quat.getX() * quat.getX());
result = (result + (quat.getY() * quat.getY()));
result = (result + (quat.getZ() * quat.getZ()));
result = (result + (quat.getW() * quat.getW()));
return result;
}
inline float length( const Quat_ref & quat )
inline float length(const Quat_ref &quat)
{
return ::sqrtf( norm( quat ) );
return ::sqrtf(norm(quat));
}
inline const Quat_ref normalize( const Quat_ref & quat )
inline const Quat_ref normalize(const Quat_ref &quat)
{
float lenSqr, lenInv;
lenSqr = norm( quat );
lenInv = ( 1.0f / sqrtf( lenSqr ) );
return Quat_ref(
( quat.getX() * lenInv ),
( quat.getY() * lenInv ),
( quat.getZ() * lenInv ),
( quat.getW() * lenInv )
);
float lenSqr, lenInv;
lenSqr = norm(quat);
lenInv = (1.0f / sqrtf(lenSqr));
return Quat_ref(
(quat.getX() * lenInv),
(quat.getY() * lenInv),
(quat.getZ() * lenInv),
(quat.getW() * lenInv));
}
inline const Quat_ref Quat_ref::rotation( const vmVector3 & unitVec0, const vmVector3 & unitVec1 )
inline const Quat_ref Quat_ref::rotation(const vmVector3 &unitVec0, const vmVector3 &unitVec1)
{
float cosHalfAngleX2, recipCosHalfAngleX2;
cosHalfAngleX2 = sqrtf( ( 2.0f * ( 1.0f + dot( unitVec0, unitVec1 ) ) ) );
recipCosHalfAngleX2 = ( 1.0f / cosHalfAngleX2 );
return Quat_ref( ( cross( unitVec0, unitVec1 ) * recipCosHalfAngleX2 ), ( cosHalfAngleX2 * 0.5f ) );
float cosHalfAngleX2, recipCosHalfAngleX2;
cosHalfAngleX2 = sqrtf((2.0f * (1.0f + dot(unitVec0, unitVec1))));
recipCosHalfAngleX2 = (1.0f / cosHalfAngleX2);
return Quat_ref((cross(unitVec0, unitVec1) * recipCosHalfAngleX2), (cosHalfAngleX2 * 0.5f));
}
inline const Quat_ref Quat_ref::rotation( float radians, const vmVector3 & unitVec )
inline const Quat_ref Quat_ref::rotation(float radians, const vmVector3 &unitVec)
{
float s, c, angle;
angle = ( radians * 0.5f );
s = sinf( angle );
c = cosf( angle );
return Quat_ref( ( unitVec * s ), c );
float s, c, angle;
angle = (radians * 0.5f);
s = sinf(angle);
c = cosf(angle);
return Quat_ref((unitVec * s), c);
}
inline const Quat_ref Quat_ref::rotationX( float radians )
inline const Quat_ref Quat_ref::rotationX(float radians)
{
float s, c, angle;
angle = ( radians * 0.5f );
s = sinf( angle );
c = cosf( angle );
return Quat_ref( s, 0.0f, 0.0f, c );
float s, c, angle;
angle = (radians * 0.5f);
s = sinf(angle);
c = cosf(angle);
return Quat_ref(s, 0.0f, 0.0f, c);
}
inline const Quat_ref Quat_ref::rotationY( float radians )
inline const Quat_ref Quat_ref::rotationY(float radians)
{
float s, c, angle;
angle = ( radians * 0.5f );
s = sinf( angle );
c = cosf( angle );
return Quat_ref( 0.0f, s, 0.0f, c );
float s, c, angle;
angle = (radians * 0.5f);
s = sinf(angle);
c = cosf(angle);
return Quat_ref(0.0f, s, 0.0f, c);
}
inline const Quat_ref Quat_ref::rotationZ( float radians )
inline const Quat_ref Quat_ref::rotationZ(float radians)
{
float s, c, angle;
angle = ( radians * 0.5f );
s = sinf( angle );
c = cosf( angle );
return Quat_ref( 0.0f, 0.0f, s, c );
float s, c, angle;
angle = (radians * 0.5f);
s = sinf(angle);
c = cosf(angle);
return Quat_ref(0.0f, 0.0f, s, c);
}
inline const Quat_ref Quat_ref::operator *( const Quat_ref & quat ) const
inline const Quat_ref Quat_ref::operator*(const Quat_ref &quat) const
{
return Quat_ref(
( ( ( ( mW * quat.mX ) + ( mX * quat.mW ) ) + ( mY * quat.mZ ) ) - ( mZ * quat.mY ) ),
( ( ( ( mW * quat.mY ) + ( mY * quat.mW ) ) + ( mZ * quat.mX ) ) - ( mX * quat.mZ ) ),
( ( ( ( mW * quat.mZ ) + ( mZ * quat.mW ) ) + ( mX * quat.mY ) ) - ( mY * quat.mX ) ),
( ( ( ( mW * quat.mW ) - ( mX * quat.mX ) ) - ( mY * quat.mY ) ) - ( mZ * quat.mZ ) )
);
return Quat_ref(
((((mW * quat.mX) + (mX * quat.mW)) + (mY * quat.mZ)) - (mZ * quat.mY)),
((((mW * quat.mY) + (mY * quat.mW)) + (mZ * quat.mX)) - (mX * quat.mZ)),
((((mW * quat.mZ) + (mZ * quat.mW)) + (mX * quat.mY)) - (mY * quat.mX)),
((((mW * quat.mW) - (mX * quat.mX)) - (mY * quat.mY)) - (mZ * quat.mZ)));
}
inline Quat_ref & Quat_ref::operator *=( const Quat_ref & quat )
inline Quat_ref &Quat_ref::operator*=(const Quat_ref &quat)
{
*this = *this * quat;
return *this;
*this = *this * quat;
return *this;
}
inline const vmVector3 rotate( const Quat_ref & quat, const vmVector3 & vec )
inline const vmVector3 rotate(const Quat_ref &quat, const vmVector3 &vec)
{
float tmpX, tmpY, tmpZ, tmpW;
tmpX = ( ( ( quat.getW() * vec.getX() ) + ( quat.getY() * vec.getZ() ) ) - ( quat.getZ() * vec.getY() ) );
tmpY = ( ( ( quat.getW() * vec.getY() ) + ( quat.getZ() * vec.getX() ) ) - ( quat.getX() * vec.getZ() ) );
tmpZ = ( ( ( quat.getW() * vec.getZ() ) + ( quat.getX() * vec.getY() ) ) - ( quat.getY() * vec.getX() ) );
tmpW = ( ( ( quat.getX() * vec.getX() ) + ( quat.getY() * vec.getY() ) ) + ( quat.getZ() * vec.getZ() ) );
return vmVector3(
( ( ( ( tmpW * quat.getX() ) + ( tmpX * quat.getW() ) ) - ( tmpY * quat.getZ() ) ) + ( tmpZ * quat.getY() ) ),
( ( ( ( tmpW * quat.getY() ) + ( tmpY * quat.getW() ) ) - ( tmpZ * quat.getX() ) ) + ( tmpX * quat.getZ() ) ),
( ( ( ( tmpW * quat.getZ() ) + ( tmpZ * quat.getW() ) ) - ( tmpX * quat.getY() ) ) + ( tmpY * quat.getX() ) )
);
float tmpX, tmpY, tmpZ, tmpW;
tmpX = (((quat.getW() * vec.getX()) + (quat.getY() * vec.getZ())) - (quat.getZ() * vec.getY()));
tmpY = (((quat.getW() * vec.getY()) + (quat.getZ() * vec.getX())) - (quat.getX() * vec.getZ()));
tmpZ = (((quat.getW() * vec.getZ()) + (quat.getX() * vec.getY())) - (quat.getY() * vec.getX()));
tmpW = (((quat.getX() * vec.getX()) + (quat.getY() * vec.getY())) + (quat.getZ() * vec.getZ()));
return vmVector3(
((((tmpW * quat.getX()) + (tmpX * quat.getW())) - (tmpY * quat.getZ())) + (tmpZ * quat.getY())),
((((tmpW * quat.getY()) + (tmpY * quat.getW())) - (tmpZ * quat.getX())) + (tmpX * quat.getZ())),
((((tmpW * quat.getZ()) + (tmpZ * quat.getW())) - (tmpX * quat.getY())) + (tmpY * quat.getX())));
}
inline const Quat_ref conj( const Quat_ref & quat )
inline const Quat_ref conj(const Quat_ref &quat)
{
return Quat_ref( -quat.getX(), -quat.getY(), -quat.getZ(), quat.getW() );
return Quat_ref(-quat.getX(), -quat.getY(), -quat.getZ(), quat.getW());
}
inline const Quat_ref select( const Quat_ref & quat0, const Quat_ref & quat1, bool select1 )
inline const Quat_ref select(const Quat_ref &quat0, const Quat_ref &quat1, bool select1)
{
return Quat_ref(
( select1 )? quat1.getX() : quat0.getX(),
( select1 )? quat1.getY() : quat0.getY(),
( select1 )? quat1.getZ() : quat0.getZ(),
( select1 )? quat1.getW() : quat0.getW()
);
return Quat_ref(
(select1) ? quat1.getX() : quat0.getX(),
(select1) ? quat1.getY() : quat0.getY(),
(select1) ? quat1.getZ() : quat0.getZ(),
(select1) ? quat1.getW() : quat0.getW());
}
#define LOOPCOUNT 1000
#define NUM_CYCLES 10000
#define DATA_SIZE 1024
int Test_quat_aos_neon(void)
{
return 0;
return 0;
}
#endif

View File

@@ -9,11 +9,12 @@
#define BulletTest_Test_quat_aos_neon_h
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
int Test_quat_aos_neon(void);
int Test_quat_aos_neon(void);
#ifdef __cplusplus
}
#endif

View File

@@ -5,10 +5,8 @@
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#if defined(BT_USE_SSE_IN_API) || defined(BT_USE_NEON)
#include "Test_v3cross.h"
#include "vector.h"
@@ -27,155 +25,156 @@ static btVector3& v3cross_ref(btVector3& v1, btVector3& v2);
int Test_v3cross(void)
{
btVector3 v1, v2, v3;
float x,y,z,w;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = BT_NAN; // w channel NaN
v1.setValue(x,y,z);
btVector3 v1, v2, v3;
float x, y, z, w;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = BT_NAN; // w channel NaN
v1.setValue(x, y, z);
v1.setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v2.setValue(x,y,z);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v2.setValue(x, y, z);
v2.setW(w);
v3 = v1;
btVector3 correct_res, test_res;
{
btVector3 correct_res, test_res;
{
float vNaN = BT_NAN;
correct_res.setValue(vNaN, vNaN, vNaN);
correct_res.setValue(vNaN, vNaN, vNaN);
test_res.setValue(vNaN, vNaN, vNaN);
correct_res = v3cross_ref(v1, v2);
test_res = v3.cross(v2);
if( fabs(correct_res.m_floats[0] - test_res.m_floats[0]) +
fabs(correct_res.m_floats[1] - test_res.m_floats[1]) +
fabs(correct_res.m_floats[2] - test_res.m_floats[2]) > FLT_EPSILON * 4)
{
vlog( "Error - v3cross result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f) \n",
correct_res.m_floats[0], correct_res.m_floats[1], correct_res.m_floats[2],
test_res.m_floats[0], test_res.m_floats[1], test_res.m_floats[2]);
if (fabs(correct_res.m_floats[0] - test_res.m_floats[0]) +
fabs(correct_res.m_floats[1] - test_res.m_floats[1]) +
fabs(correct_res.m_floats[2] - test_res.m_floats[2]) >
FLT_EPSILON * 4)
{
vlog(
"Error - v3cross result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f) \n",
correct_res.m_floats[0], correct_res.m_floats[1], correct_res.m_floats[2],
test_res.m_floats[0], test_res.m_floats[1], test_res.m_floats[2]);
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btVector3 vec3_arr1[DATA_SIZE];
btVector3 vec3_arr2[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
for (k = 0; k < DATA_SIZE; k++)
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x, y, z);
vec3_arr1[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x,y,z);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x, y, z);
vec3_arr2[k].setW(w);
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
startTime = ReadTicks();
for (k = 0; k < LOOPCOUNT; k++)
{
vec3_arr1[k] = v3cross_ref(vec3_arr1[k], vec3_arr2[k]);
vec3_arr1[k] = v3cross_ref(vec3_arr1[k], vec3_arr2[k]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
scalarTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
for (k = 0; k < DATA_SIZE; k++)
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x, y, z);
vec3_arr1[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x,y,z);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x, y, z);
vec3_arr2[k].setW(w);
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
startTime = ReadTicks();
for (k = 0; k < LOOPCOUNT; k++)
{
vec3_arr1[k] = vec3_arr1[k].cross(vec3_arr2[k]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vectorTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
vlog("Timing:\n");
vlog(" \t scalar\t vector\n");
vlog(" \t%10.4f\t%10.4f\n", TicksToCycles(scalarTime) / LOOPCOUNT,
TicksToCycles(vectorTime) / LOOPCOUNT);
return 0;
return 0;
}
static btVector3& v3cross_ref(btVector3& v1, btVector3& v2)
{
btScalar x,y,z;
btScalar x, y, z;
x = v1.m_floats[1] * v2.m_floats[2] - v1.m_floats[2] * v2.m_floats[1];
y = v1.m_floats[2] * v2.m_floats[0] - v1.m_floats[0] * v2.m_floats[2];
z = v1.m_floats[0] * v2.m_floats[1] - v1.m_floats[1] * v2.m_floats[0];
v1.m_floats[0] = x;
v1.m_floats[1] = y;
v1.m_floats[2] = z;
return v1;
}
#endif //BT_USE_SSE
#endif //BT_USE_SSE

View File

@@ -9,14 +9,14 @@
#define BulletTest_Test_v3cross_h
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
int Test_v3cross(void);
int Test_v3cross(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -5,11 +5,8 @@
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#if defined(BT_USE_SSE_IN_API) || defined(BT_USE_NEON)
#include "Test_v3div.h"
#include "vector.h"
@@ -20,16 +17,16 @@
#include <LinearMath/btVector3.h>
#define BT_OP(a, b) ((a) / (b))
#define BT_OP(a, b) ((a) / (b))
// reference code for testing purposes
static inline btVector3& v3div_ref(btVector3& v1, btVector3& v2);
static btVector3& v3div_ref(btVector3& v0, btVector3& v1, btVector3& v2)
{
v0.m_floats[0] = BT_OP(v1.m_floats[0] , v2.m_floats[0]),
v0.m_floats[1] = BT_OP(v1.m_floats[1] , v2.m_floats[1]),
v0.m_floats[2] = BT_OP(v1.m_floats[2] , v2.m_floats[2]);
v0.m_floats[0] = BT_OP(v1.m_floats[0], v2.m_floats[0]),
v0.m_floats[1] = BT_OP(v1.m_floats[1], v2.m_floats[1]),
v0.m_floats[2] = BT_OP(v1.m_floats[2], v2.m_floats[2]);
return v0;
}
@@ -38,141 +35,143 @@ static btVector3& v3div_ref(btVector3& v0, btVector3& v1, btVector3& v2)
int Test_v3div(void)
{
btVector3 v1, v2, v3;
float x,y,z,w;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = BT_NAN; // w channel NaN
v1.setValue(x,y,z);
btVector3 v1, v2, v3;
float x, y, z, w;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = BT_NAN; // w channel NaN
v1.setValue(x, y, z);
v1.setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v2.setValue(x,y,z);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v2.setValue(x, y, z);
v2.setW(w);
v3 = v1;
btVector3 correct_res, test_res;
{
btVector3 correct_res, test_res;
{
float vNaN = BT_NAN;
correct_res.setValue(vNaN, vNaN, vNaN);
correct_res.setValue(vNaN, vNaN, vNaN);
test_res.setValue(vNaN, vNaN, vNaN);
correct_res = v3div_ref(correct_res, v1, v2);
test_res = BT_OP(v3,v2);
if( fabsf(correct_res.m_floats[0] - test_res.m_floats[0]) +
fabsf(correct_res.m_floats[1] - test_res.m_floats[1]) +
fabsf(correct_res.m_floats[2] - test_res.m_floats[2]) > FLT_EPSILON*10 )
{
vlog( "Error - v3div result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f) \n",
correct_res.m_floats[0], correct_res.m_floats[1], correct_res.m_floats[2],
test_res.m_floats[0], test_res.m_floats[1], test_res.m_floats[2]);
test_res = BT_OP(v3, v2);
if (fabsf(correct_res.m_floats[0] - test_res.m_floats[0]) +
fabsf(correct_res.m_floats[1] - test_res.m_floats[1]) +
fabsf(correct_res.m_floats[2] - test_res.m_floats[2]) >
FLT_EPSILON * 10)
{
vlog(
"Error - v3div result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f) \n",
correct_res.m_floats[0], correct_res.m_floats[1], correct_res.m_floats[2],
test_res.m_floats[0], test_res.m_floats[1], test_res.m_floats[2]);
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btVector3 vec3_arr0[DATA_SIZE];
btVector3 vec3_arr1[DATA_SIZE];
btVector3 vec3_arr2[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
{
uint64_t startTime, bestTime, currentTime;
w = BT_NAN; // w channel NaN
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
uint64_t startTime, bestTime, currentTime;
w = BT_NAN; // w channel NaN
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
for (k = 0; k < DATA_SIZE; k++)
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x, y, z);
vec3_arr1[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x,y,z);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x, y, z);
vec3_arr2[k].setW(w);
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
startTime = ReadTicks();
for (k = 0; k < LOOPCOUNT; k++)
{
vec3_arr0[k] = v3div_ref(vec3_arr0[k], vec3_arr1[k], vec3_arr2[k]);
vec3_arr0[k] = v3div_ref(vec3_arr0[k], vec3_arr1[k], vec3_arr2[k]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
scalarTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
for (k = 0; k < DATA_SIZE; k++)
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x, y, z);
vec3_arr1[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x,y,z);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x, y, z);
vec3_arr2[k].setW(w);
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
startTime = ReadTicks();
for (k = 0; k < LOOPCOUNT; k++)
{
vec3_arr0[k] = BT_OP(vec3_arr1[k] , vec3_arr2[k]);
vec3_arr0[k] = BT_OP(vec3_arr1[k], vec3_arr2[k]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vectorTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
vlog("Timing:\n");
vlog(" \t scalar\t vector\n");
vlog(" \t%10.4f\t%10.4f\n", TicksToCycles(scalarTime) / LOOPCOUNT,
TicksToCycles(vectorTime) / LOOPCOUNT);
return 0;
return 0;
}
#endif //BT_USE_SSE
#endif //BT_USE_SSE

View File

@@ -9,14 +9,14 @@
#define BulletTest_Test_v3div_h
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
int Test_v3div(void);
int Test_v3div(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -5,10 +5,8 @@
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#if defined(BT_USE_SSE_IN_API) || defined(BT_USE_NEON)
#include "Test_v3dot.h"
#include "vector.h"
@@ -20,9 +18,8 @@
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static inline
btScalar v3dot_ref(
const btVector3& v1,
static inline btScalar v3dot_ref(
const btVector3& v1,
const btVector3& v2);
#define LOOPCOUNT 1000
@@ -30,135 +27,141 @@ btScalar v3dot_ref(
int Test_v3dot(void)
{
btVector3 v1, v2;
float x,y,z,w;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = BT_NAN; // w channel NaN
v1.setValue(x,y,z);
btVector3 v1, v2;
float x, y, z, w;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = BT_NAN; // w channel NaN
v1.setValue(x, y, z);
v1.setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v2.setValue(x,y,z);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v2.setValue(x, y, z);
v2.setW(w);
float correctDot0, testDot0;
{
float correctDot0, testDot0;
{
correctDot0 = w;
testDot0 = w; ;
testDot0 = w;
;
correctDot0 = v3dot_ref(v1, v2);
testDot0 = v1.dot(v2);
if( fabsf(correctDot0 - testDot0) > FLT_EPSILON * 4 )
if (fabsf(correctDot0 - testDot0) > FLT_EPSILON * 4)
{
vlog( "Error - v3dot result error! %f != %f \n", correctDot0, testDot0);
vlog("Error - v3dot result error! %f != %f \n", correctDot0, testDot0);
return 1;
}
}
#define DATA_SIZE 1024
btVector3 vec3_arr1[DATA_SIZE];
btVector3 vec3_arr2[DATA_SIZE];
btScalar res_arr[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
btScalar res_arr[DATA_SIZE];
for( k = 0; k < DATA_SIZE; k++ )
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
for (k = 0; k < DATA_SIZE; k++)
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x, y, z);
vec3_arr1[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x,y,z);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x, y, z);
vec3_arr2[k].setW(w);
res_arr[k] = w;
}
res_arr[k] = w;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
startTime = ReadTicks();
for (k = 0; k + 4 <= LOOPCOUNT; k += 4)
{
size_t k32 = (k & (DATA_SIZE-1));
res_arr[k32] = v3dot_ref( vec3_arr1[k32], vec3_arr2[k32]); k32++;
res_arr[k32] = v3dot_ref( vec3_arr1[k32], vec3_arr2[k32]); k32++;
res_arr[k32] = v3dot_ref( vec3_arr1[k32], vec3_arr2[k32]); k32++;
res_arr[k32] = v3dot_ref( vec3_arr1[k32], vec3_arr2[k32]);
size_t k32 = (k & (DATA_SIZE - 1));
res_arr[k32] = v3dot_ref(vec3_arr1[k32], vec3_arr2[k32]);
k32++;
res_arr[k32] = v3dot_ref(vec3_arr1[k32], vec3_arr2[k32]);
k32++;
res_arr[k32] = v3dot_ref(vec3_arr1[k32], vec3_arr2[k32]);
k32++;
res_arr[k32] = v3dot_ref(vec3_arr1[k32], vec3_arr2[k32]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
scalarTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
startTime = ReadTicks();
for (k = 0; k + 4 <= LOOPCOUNT; k += 4)
{
size_t k32 = k & (DATA_SIZE -1);
res_arr[k32] = vec3_arr1[k32].dot(vec3_arr2[k32]); k32++;
res_arr[k32] = vec3_arr1[k32].dot(vec3_arr2[k32]); k32++;
res_arr[k32] = vec3_arr1[k32].dot(vec3_arr2[k32]); k32++;
size_t k32 = k & (DATA_SIZE - 1);
res_arr[k32] = vec3_arr1[k32].dot(vec3_arr2[k32]);
k32++;
res_arr[k32] = vec3_arr1[k32].dot(vec3_arr2[k32]);
k32++;
res_arr[k32] = vec3_arr1[k32].dot(vec3_arr2[k32]);
k32++;
res_arr[k32] = vec3_arr1[k32].dot(vec3_arr2[k32]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vectorTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT );
vlog("Timing:\n");
vlog(" \t scalar\t vector\n");
vlog(" \t%10.4f\t%10.4f\n", TicksToCycles(scalarTime) / LOOPCOUNT, TicksToCycles(vectorTime) / LOOPCOUNT);
return 0;
return 0;
}
static btScalar v3dot_ref(const btVector3& v1,
const btVector3& v2)
static btScalar v3dot_ref(const btVector3& v1,
const btVector3& v2)
{
return (v1.m_floats[0] * v2.m_floats[0] +
v1.m_floats[1] * v2.m_floats[1] +
v1.m_floats[2] * v2.m_floats[2]);
return (v1.m_floats[0] * v2.m_floats[0] +
v1.m_floats[1] * v2.m_floats[1] +
v1.m_floats[2] * v2.m_floats[2]);
}
#endif //BT_USE_SSE
#endif //BT_USE_SSE

View File

@@ -9,14 +9,14 @@
#define BulletTest_Test_v3dot_h
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
int Test_v3dot(void);
int Test_v3dot(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -5,11 +5,8 @@
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#if defined(BT_USE_SSE_IN_API) || defined(BT_USE_NEON)
#include "Test_v3interp.h"
#include "vector.h"
@@ -21,177 +18,178 @@
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static inline
btVector3& v3interp_ref(
btVector3& vr,
btVector3& v0,
btVector3& v1,
btScalar& rt);
static inline btVector3& v3interp_ref(
btVector3& vr,
btVector3& v0,
btVector3& v1,
btScalar& rt);
#define LOOPCOUNT 1024
#define NUM_CYCLES 1000
int Test_v3interp(void)
{
btVector3 v1, v2;
btVector3 v1, v2;
btScalar rt;
float x,y,z,w;
float x, y, z, w;
float vNaN = BT_NAN;
w = BT_NAN; // w channel NaN
btVector3 correct_res, test_res;
w = BT_NAN; // w channel NaN
for (rt = 0.0f; rt <= 1.0f; rt += 0.1f)
{
correct_res.setValue(vNaN, vNaN, vNaN);
btVector3 correct_res, test_res;
for (rt = 0.0f; rt <= 1.0f; rt += 0.1f)
{
correct_res.setValue(vNaN, vNaN, vNaN);
test_res.setValue(vNaN, vNaN, vNaN);
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v1.setValue(x,y,z);
v1.setW(w);
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v1.setValue(x, y, z);
v1.setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v2.setValue(x,y,z);
v2.setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v2.setValue(x, y, z);
v2.setW(w);
correct_res = v3interp_ref(correct_res, v1, v2, rt);
//test self-referencing vector, see issue https://github.com/bulletphysics/bullet3/pull/313
correct_res = v3interp_ref(correct_res, v1, v2, rt);
//test self-referencing vector, see issue https://github.com/bulletphysics/bullet3/pull/313
test_res = v1;
test_res.setInterpolate3(test_res, v2, rt);
if( fabs(correct_res.m_floats[0] - test_res.m_floats[0]) +
fabs(correct_res.m_floats[1] - test_res.m_floats[1]) +
fabs(correct_res.m_floats[2] - test_res.m_floats[2]) > FLT_EPSILON * 4)
{
vlog( "Error - v3interp result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f) \n"
"\n rt=%10.4f",
correct_res.m_floats[0], correct_res.m_floats[1], correct_res.m_floats[2],
test_res.m_floats[0], test_res.m_floats[1], test_res.m_floats[2], rt);
if (fabs(correct_res.m_floats[0] - test_res.m_floats[0]) +
fabs(correct_res.m_floats[1] - test_res.m_floats[1]) +
fabs(correct_res.m_floats[2] - test_res.m_floats[2]) >
FLT_EPSILON * 4)
{
vlog(
"Error - v3interp result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f) \n"
"\n rt=%10.4f",
correct_res.m_floats[0], correct_res.m_floats[1], correct_res.m_floats[2],
test_res.m_floats[0], test_res.m_floats[1], test_res.m_floats[2], rt);
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btVector3 vec3_arr1[DATA_SIZE];
btVector3 vec3_arr2[DATA_SIZE];
btScalar rt_arr[DATA_SIZE];
btScalar rt_arr[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
for (k = 0; k < DATA_SIZE; k++)
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x, y, z);
vec3_arr1[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x,y,z);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x, y, z);
vec3_arr2[k].setW(w);
rt_arr[k] = RANDF_01;
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
rt_arr[k] = RANDF_01;
}
startTime = ReadTicks();
for (k = 0; k < LOOPCOUNT; k++)
{
v3interp_ref(vec3_arr1[k], vec3_arr1[k], vec3_arr2[k], rt_arr[k]);
v3interp_ref(vec3_arr1[k], vec3_arr1[k], vec3_arr2[k], rt_arr[k]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
scalarTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
for (k = 0; k < DATA_SIZE; k++)
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x, y, z);
vec3_arr1[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x,y,z);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x, y, z);
vec3_arr2[k].setW(w);
rt_arr[k] = RANDF_01;
rt_arr[k] = RANDF_01;
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
startTime = ReadTicks();
for (k = 0; k < LOOPCOUNT; k++)
{
vec3_arr1[k].setInterpolate3(vec3_arr1[k], vec3_arr2[k], rt_arr[k]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vectorTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
vlog("Timing:\n");
vlog(" \t scalar\t vector\n");
vlog(" \t%10.4f\t%10.4f\n", TicksToCycles(scalarTime) / LOOPCOUNT,
TicksToCycles(vectorTime) / LOOPCOUNT);
return 0;
return 0;
}
static btVector3&
static btVector3&
v3interp_ref(
btVector3& vr,
btVector3& v0,
btVector3& v1,
btScalar& rt)
btVector3& vr,
btVector3& v0,
btVector3& v1,
btScalar& rt)
{
btScalar s = btScalar(1.0) - rt;
vr.m_floats[0] = s * v0.m_floats[0] + rt * v1.m_floats[0];
vr.m_floats[1] = s * v0.m_floats[1] + rt * v1.m_floats[1];
vr.m_floats[2] = s * v0.m_floats[2] + rt * v1.m_floats[2];
btScalar s = btScalar(1.0) - rt;
vr.m_floats[0] = s * v0.m_floats[0] + rt * v1.m_floats[0];
vr.m_floats[1] = s * v0.m_floats[1] + rt * v1.m_floats[1];
vr.m_floats[2] = s * v0.m_floats[2] + rt * v1.m_floats[2];
return vr;
}
#endif //BT_USE_SSE
#endif //BT_USE_SSE

View File

@@ -9,14 +9,14 @@
#define BulletTest_Test_v3interp_h
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
int Test_v3interp(void);
int Test_v3interp(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -5,11 +5,8 @@
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#if defined(BT_USE_SSE_IN_API) || defined(BT_USE_NEON)
#include "Test_v3lerp.h"
#include "vector.h"
@@ -21,178 +18,177 @@
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static inline
btVector3&
static inline btVector3&
v3lerp_ref(
btVector3& vr,
btVector3& v0,
btVector3& v1,
btScalar& rt);
btVector3& vr,
btVector3& v0,
btVector3& v1,
btScalar& rt);
#define LOOPCOUNT 1024
#define NUM_CYCLES 1000
int Test_v3lerp(void)
{
btVector3 v1, v2;
btVector3 v1, v2;
btScalar rt;
float x,y,z,w;
float vNaN =BT_NAN;
w =BT_NAN; // w channel NaN
btVector3 correct_res, test_res;
float x, y, z, w;
for (rt = 0.0f; rt <= 1.0f; rt += 0.1f)
{
correct_res.setValue(vNaN, vNaN, vNaN);
float vNaN = BT_NAN;
w = BT_NAN; // w channel NaN
btVector3 correct_res, test_res;
for (rt = 0.0f; rt <= 1.0f; rt += 0.1f)
{
correct_res.setValue(vNaN, vNaN, vNaN);
test_res.setValue(vNaN, vNaN, vNaN);
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v1.setValue(x,y,z);
v1.setW(w);
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v1.setValue(x, y, z);
v1.setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v2.setValue(x,y,z);
v2.setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v2.setValue(x, y, z);
v2.setW(w);
correct_res = v3lerp_ref(correct_res, v1, v2, rt);
correct_res = v3lerp_ref(correct_res, v1, v2, rt);
test_res = v1.lerp(v2, rt);
if( fabs(correct_res.m_floats[0] - test_res.m_floats[0]) +
fabs(correct_res.m_floats[1] - test_res.m_floats[1]) +
fabs(correct_res.m_floats[2] - test_res.m_floats[2]) > FLT_EPSILON * 4)
{
vlog( "Error - v3lerp result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f) \n"
"\n rt=%10.4f",
correct_res.m_floats[0], correct_res.m_floats[1], correct_res.m_floats[2],
test_res.m_floats[0], test_res.m_floats[1], test_res.m_floats[2], rt);
if (fabs(correct_res.m_floats[0] - test_res.m_floats[0]) +
fabs(correct_res.m_floats[1] - test_res.m_floats[1]) +
fabs(correct_res.m_floats[2] - test_res.m_floats[2]) >
FLT_EPSILON * 4)
{
vlog(
"Error - v3lerp result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f) \n"
"\n rt=%10.4f",
correct_res.m_floats[0], correct_res.m_floats[1], correct_res.m_floats[2],
test_res.m_floats[0], test_res.m_floats[1], test_res.m_floats[2], rt);
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btVector3 vec3_arr1[DATA_SIZE];
btVector3 vec3_arr2[DATA_SIZE];
btScalar rt_arr[DATA_SIZE];
btScalar rt_arr[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
{
uint64_t startTime, bestTime, currentTime;
w =BT_NAN; // w channel NaN
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
uint64_t startTime, bestTime, currentTime;
w = BT_NAN; // w channel NaN
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
for (k = 0; k < DATA_SIZE; k++)
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x, y, z);
vec3_arr1[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x,y,z);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x, y, z);
vec3_arr2[k].setW(w);
rt_arr[k] = RANDF_01;
rt_arr[k] = RANDF_01;
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
startTime = ReadTicks();
for (k = 0; k < LOOPCOUNT; k++)
{
v3lerp_ref(vec3_arr1[k], vec3_arr1[k], vec3_arr2[k], rt_arr[k]);
v3lerp_ref(vec3_arr1[k], vec3_arr1[k], vec3_arr2[k], rt_arr[k]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
scalarTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
for (k = 0; k < DATA_SIZE; k++)
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x, y, z);
vec3_arr1[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x,y,z);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x, y, z);
vec3_arr2[k].setW(w);
rt_arr[k] = RANDF_01;
rt_arr[k] = RANDF_01;
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
startTime = ReadTicks();
for (k = 0; k < LOOPCOUNT; k++)
{
vec3_arr1[k] = vec3_arr1[k].lerp(vec3_arr2[k], rt_arr[k]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vectorTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
vlog("Timing:\n");
vlog(" \t scalar\t vector\n");
vlog(" \t%10.4f\t%10.4f\n", TicksToCycles(scalarTime) / LOOPCOUNT,
TicksToCycles(vectorTime) / LOOPCOUNT);
return 0;
return 0;
}
static
btVector3&
static btVector3&
v3lerp_ref(
btVector3& vr,
btVector3& v0,
btVector3& v1,
btScalar& rt)
btVector3& vr,
btVector3& v0,
btVector3& v1,
btScalar& rt)
{
vr.m_floats[0] = v0.m_floats[0] + rt * (v1.m_floats[0] - v0.m_floats[0]);
vr.m_floats[1] = v0.m_floats[1] + rt * (v1.m_floats[1] - v0.m_floats[1]);
vr.m_floats[2] = v0.m_floats[2] + rt * (v1.m_floats[2] - v0.m_floats[2]);
vr.m_floats[0] = v0.m_floats[0] + rt * (v1.m_floats[0] - v0.m_floats[0]);
vr.m_floats[1] = v0.m_floats[1] + rt * (v1.m_floats[1] - v0.m_floats[1]);
vr.m_floats[2] = v0.m_floats[2] + rt * (v1.m_floats[2] - v0.m_floats[2]);
return vr;
}
#endif //BT_USE_SSE
#endif //BT_USE_SSE

View File

@@ -9,14 +9,14 @@
#define BulletTest_Test_v3lerp_h
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
int Test_v3lerp(void);
int Test_v3lerp(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -5,11 +5,8 @@
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#if defined(BT_USE_SSE_IN_API) || defined(BT_USE_NEON)
#include "Test_v3norm.h"
#include "vector.h"
@@ -28,143 +25,144 @@ static inline btVector3& v3norm_ref(btVector3& v);
int Test_v3norm(void)
{
btVector3 v1, v2;
float x,y,z,w;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = BT_NAN; // w channel NaN
v1.setValue(x,y,z);
btVector3 v1, v2;
float x, y, z, w;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = BT_NAN; // w channel NaN
v1.setValue(x, y, z);
v1.setW(w);
v2 = v1;
v2 = v1;
btVector3 correct_res, test_res;
{
btVector3 correct_res, test_res;
{
float vNaN = BT_NAN;
correct_res.setValue(vNaN, vNaN, vNaN);
correct_res.setValue(vNaN, vNaN, vNaN);
test_res.setValue(vNaN, vNaN, vNaN);
correct_res = v3norm_ref(v1);
test_res = v2.normalize();
if( fabs(correct_res.m_floats[0] - test_res.m_floats[0]) +
fabs(correct_res.m_floats[1] - test_res.m_floats[1]) +
fabs(correct_res.m_floats[2] - test_res.m_floats[2]) > FLT_EPSILON * 4)
{
vlog( "Error - v3norm result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f) \n",
correct_res.m_floats[0], correct_res.m_floats[1], correct_res.m_floats[2],
test_res.m_floats[0], test_res.m_floats[1], test_res.m_floats[2]);
if (fabs(correct_res.m_floats[0] - test_res.m_floats[0]) +
fabs(correct_res.m_floats[1] - test_res.m_floats[1]) +
fabs(correct_res.m_floats[2] - test_res.m_floats[2]) >
FLT_EPSILON * 4)
{
vlog(
"Error - v3norm result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f) \n",
correct_res.m_floats[0], correct_res.m_floats[1], correct_res.m_floats[2],
test_res.m_floats[0], test_res.m_floats[1], test_res.m_floats[2]);
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btVector3 vec3_arr0[DATA_SIZE];
btVector3 vec3_arr1[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
for (k = 0; k < DATA_SIZE; k++)
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
vec3_arr1[k].setValue(x, y, z);
vec3_arr1[k].setW(w);
}
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
startTime = ReadTicks();
for (k = 0; k + 4 <= LOOPCOUNT; k += 4)
{
vec3_arr0[k] = v3norm_ref(vec3_arr1[k]);
vec3_arr0[k+1] = v3norm_ref(vec3_arr1[k+1]);
vec3_arr0[k+2] = v3norm_ref(vec3_arr1[k+2]);
vec3_arr0[k+3] = v3norm_ref(vec3_arr1[k+3]);
vec3_arr0[k] = v3norm_ref(vec3_arr1[k]);
vec3_arr0[k + 1] = v3norm_ref(vec3_arr1[k + 1]);
vec3_arr0[k + 2] = v3norm_ref(vec3_arr1[k + 2]);
vec3_arr0[k + 3] = v3norm_ref(vec3_arr1[k + 3]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
scalarTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
for (k = 0; k < DATA_SIZE; k++)
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
vec3_arr1[k].setValue(x, y, z);
vec3_arr1[k].setW(w);
}
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
startTime = ReadTicks();
for (k = 0; k + 4 <= LOOPCOUNT; k += 4)
{
vec3_arr0[k] = vec3_arr1[k].normalize();
vec3_arr0[k+1] = vec3_arr1[k+1].normalize();
vec3_arr0[k+2] = vec3_arr1[k+2].normalize();
vec3_arr0[k+3] = vec3_arr1[k+3].normalize();
vec3_arr0[k + 1] = vec3_arr1[k + 1].normalize();
vec3_arr0[k + 2] = vec3_arr1[k + 2].normalize();
vec3_arr0[k + 3] = vec3_arr1[k + 3].normalize();
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vectorTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
vlog("Timing:\n");
vlog(" \t scalar\t vector\n");
vlog(" \t%10.4f\t%10.4f\n", TicksToCycles(scalarTime) / LOOPCOUNT,
TicksToCycles(vectorTime) / LOOPCOUNT);
return 0;
return 0;
}
static btVector3& v3norm_ref(btVector3& v)
{
float dot = v.m_floats[0] * v.m_floats[0] +
float dot = v.m_floats[0] * v.m_floats[0] +
v.m_floats[1] * v.m_floats[1] +
v.m_floats[2] * v.m_floats[2];
dot = 1.0f / sqrtf(dot);
v.m_floats[0] *= dot;
v.m_floats[0] *= dot;
v.m_floats[1] *= dot;
v.m_floats[2] *= dot;
return v;
}
#endif //BT_USE_SSE
#endif //BT_USE_SSE

View File

@@ -9,14 +9,14 @@
#define BulletTest_Test_v3norm_h
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
int Test_v3norm(void);
int Test_v3norm(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -5,10 +5,8 @@
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#if defined(BT_USE_SSE_IN_API) || defined(BT_USE_NEON)
#include "Test_v3rotate.h"
#include "vector.h"
@@ -20,10 +18,9 @@
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static inline
btVector3& v3rotate_ref(
btVector3& v0,
btVector3& v1,
static inline btVector3& v3rotate_ref(
btVector3& v0,
btVector3& v1,
const btScalar& s);
#define LOOPCOUNT 2048
@@ -31,164 +28,165 @@ btVector3& v3rotate_ref(
int Test_v3rotate(void)
{
btVector3 v1, v2;
btVector3 v1, v2;
float s;
float x,y,z,w;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = BT_NAN; // w channel NaN
v1.setValue(x,y,z);
float x, y, z, w;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = BT_NAN; // w channel NaN
v1.setValue(x, y, z);
v1.setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v2.setValue(x,y,z);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v2.setValue(x, y, z);
v2.setW(w);
s = RANDF_01 * (float) SIMD_PI;
btVector3 correct_res, test_res;
{
s = RANDF_01 * (float)SIMD_PI;
btVector3 correct_res, test_res;
{
float vNaN = BT_NAN;
correct_res.setValue(vNaN, vNaN, vNaN);
correct_res.setValue(vNaN, vNaN, vNaN);
test_res.setValue(vNaN, vNaN, vNaN);
test_res = v1.rotate(v2, s);
correct_res = v3rotate_ref(v1, v2, s);
if( fabs(correct_res.m_floats[0] - test_res.m_floats[0]) +
fabs(correct_res.m_floats[1] - test_res.m_floats[1]) +
fabs(correct_res.m_floats[2] - test_res.m_floats[2]) > FLT_EPSILON * 4)
{
vlog( "Error - v3rotate result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f) \n",
correct_res.m_floats[0], correct_res.m_floats[1], correct_res.m_floats[2],
test_res.m_floats[0], test_res.m_floats[1], test_res.m_floats[2]);
correct_res = v3rotate_ref(v1, v2, s);
if (fabs(correct_res.m_floats[0] - test_res.m_floats[0]) +
fabs(correct_res.m_floats[1] - test_res.m_floats[1]) +
fabs(correct_res.m_floats[2] - test_res.m_floats[2]) >
FLT_EPSILON * 4)
{
vlog(
"Error - v3rotate result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f) \n",
correct_res.m_floats[0], correct_res.m_floats[1], correct_res.m_floats[2],
test_res.m_floats[0], test_res.m_floats[1], test_res.m_floats[2]);
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btVector3 vec3_arr0[DATA_SIZE];
btVector3 vec3_arr1[DATA_SIZE];
btScalar s_arr[DATA_SIZE];
btScalar s_arr[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
for (k = 0; k < DATA_SIZE; k++)
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr0[k].setValue(x,y,z);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr0[k].setValue(x, y, z);
vec3_arr0[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x, y, z);
vec3_arr1[k].setW(w);
s_arr[k] = RANDF_01 * (float)SIMD_PI;
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
startTime = ReadTicks();
for (k = 0; k < LOOPCOUNT; k++)
{
vec3_arr0[k] = v3rotate_ref(vec3_arr0[k], vec3_arr1[k], s_arr[k]);
vec3_arr0[k] = v3rotate_ref(vec3_arr0[k], vec3_arr1[k], s_arr[k]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
scalarTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
for (k = 0; k < DATA_SIZE; k++)
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr0[k].setValue(x,y,z);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr0[k].setValue(x, y, z);
vec3_arr0[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x, y, z);
vec3_arr1[k].setW(w);
s_arr[k] = RANDF_01 * (float)SIMD_PI;
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
startTime = ReadTicks();
for (k = 0; k < LOOPCOUNT; k++)
{
vec3_arr0[k ] = vec3_arr0[k ].rotate(vec3_arr1[k ], s_arr[k]);
vec3_arr0[k] = vec3_arr0[k].rotate(vec3_arr1[k], s_arr[k]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vectorTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
vlog("Timing:\n");
vlog(" \t scalar\t vector\n");
vlog(" \t%10.4f\t%10.4f\n", TicksToCycles(scalarTime) / LOOPCOUNT,
TicksToCycles(vectorTime) / LOOPCOUNT);
return 0;
return 0;
}
static inline
btVector3&
static inline btVector3&
v3rotate_ref(
btVector3& v0,
btVector3& wAxis,
const btScalar& _angle)
btVector3& v0,
btVector3& wAxis,
const btScalar& _angle)
{
btVector3 o = wAxis * wAxis.dot( v0 );
btVector3 o = wAxis * wAxis.dot(v0);
btVector3 _x = v0 - o;
btVector3 _y;
_y = wAxis.cross( v0 );
_y = wAxis.cross(v0);
v0 = o + _x * cosf( _angle ) + _y * sinf( _angle );
v0 = o + _x * cosf(_angle) + _y * sinf(_angle);
return v0;
}
#endif //BT_USE_SSE
#endif //BT_USE_SSE

View File

@@ -9,14 +9,14 @@
#define BulletTest_Test_v3rotate_h
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
int Test_v3rotate(void);
int Test_v3rotate(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -5,11 +5,8 @@
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#if defined(BT_USE_SSE_IN_API) || defined(BT_USE_NEON)
#include "Test_v3sdiv.h"
#include "vector.h"
@@ -21,9 +18,8 @@
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static inline
btVector3& v3sdiv_ref(
btVector3& v,
static inline btVector3& v3sdiv_ref(
btVector3& v,
const btScalar& s);
#define LOOPCOUNT 2048
@@ -31,151 +27,152 @@ btVector3& v3sdiv_ref(
int Test_v3sdiv(void)
{
btVector3 v1, v2;
btVector3 v1, v2;
btScalar s;
float x,y,z,w;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = BT_NAN; // w channel NaN
v1.setValue(x,y,z);
float x, y, z, w;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = BT_NAN; // w channel NaN
v1.setValue(x, y, z);
v1.setW(w);
v2.setValue(x,y,z);
v2.setValue(x, y, z);
v2.setW(w);
s = (float) RANDF_16;
btVector3 correct_res, test_res;
{
s = (float)RANDF_16;
btVector3 correct_res, test_res;
{
float vNaN = BT_NAN;
correct_res.setValue(vNaN, vNaN, vNaN);
correct_res.setValue(vNaN, vNaN, vNaN);
test_res.setValue(vNaN, vNaN, vNaN);
correct_res = v3sdiv_ref(v1, s);
test_res = (v2 /= s);
if( fabs(correct_res.m_floats[0] - test_res.m_floats[0]) +
fabs(correct_res.m_floats[1] - test_res.m_floats[1]) +
fabs(correct_res.m_floats[2] - test_res.m_floats[2]) > FLT_EPSILON * 4)
{
vlog( "Error - v3sdiv result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f) \n",
correct_res.m_floats[0], correct_res.m_floats[1], correct_res.m_floats[2],
test_res.m_floats[0], test_res.m_floats[1], test_res.m_floats[2]);
if (fabs(correct_res.m_floats[0] - test_res.m_floats[0]) +
fabs(correct_res.m_floats[1] - test_res.m_floats[1]) +
fabs(correct_res.m_floats[2] - test_res.m_floats[2]) >
FLT_EPSILON * 4)
{
vlog(
"Error - v3sdiv result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f) \n",
correct_res.m_floats[0], correct_res.m_floats[1], correct_res.m_floats[2],
test_res.m_floats[0], test_res.m_floats[1], test_res.m_floats[2]);
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btVector3 vec3_arr[DATA_SIZE];
btScalar s_arr[DATA_SIZE];
btScalar s_arr[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
{
uint64_t startTime, bestTime, currentTime;
bestTime = uint64_t(-1LL);
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
uint64_t startTime, bestTime, currentTime;
bestTime = uint64_t(-1LL);
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
for (k = 0; k < DATA_SIZE; k++)
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr[k].setValue(x,y,z);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr[k].setValue(x, y, z);
vec3_arr[k].setW(w);
s_arr[k] = RANDF_01;
}
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
startTime = ReadTicks();
for (k = 0; k + 4 <= LOOPCOUNT; k += 4)
{
v3sdiv_ref( vec3_arr[k], s_arr[k]);
v3sdiv_ref( vec3_arr[k+1], s_arr[k+1]);
v3sdiv_ref( vec3_arr[k+2], s_arr[k+2]);
v3sdiv_ref( vec3_arr[k+3], s_arr[k+3]);
v3sdiv_ref(vec3_arr[k], s_arr[k]);
v3sdiv_ref(vec3_arr[k + 1], s_arr[k + 1]);
v3sdiv_ref(vec3_arr[k + 2], s_arr[k + 2]);
v3sdiv_ref(vec3_arr[k + 3], s_arr[k + 3]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
scalarTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
for (k = 0; k < DATA_SIZE; k++)
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr[k].setValue(x,y,z);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr[k].setValue(x, y, z);
vec3_arr[k].setW(w);
s_arr[k] = RANDF_01;
}
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
startTime = ReadTicks();
for (k = 0; k + 4 <= LOOPCOUNT; k += 4)
{
vec3_arr[k] /= s_arr[k];
vec3_arr[k+1] /= s_arr[k+1];
vec3_arr[k+2] /= s_arr[k+2];
vec3_arr[k+3] /= s_arr[k+3];
vec3_arr[k + 1] /= s_arr[k + 1];
vec3_arr[k + 2] /= s_arr[k + 2];
vec3_arr[k + 3] /= s_arr[k + 3];
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vectorTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
vlog("Timing:\n");
vlog(" \t scalar\t vector\n");
vlog(" \t%10.4f\t%10.4f\n", TicksToCycles(scalarTime) / LOOPCOUNT,
TicksToCycles(vectorTime) / LOOPCOUNT);
return 0;
return 0;
}
static inline
btVector3&
static inline btVector3&
v3sdiv_ref(
btVector3& v,
const btScalar& s)
btVector3& v,
const btScalar& s)
{
btScalar recip = btScalar(1.0) / s;
v.m_floats[0] *= recip;
v.m_floats[0] *= recip;
v.m_floats[1] *= recip;
v.m_floats[2] *= recip;
return v;
}
#endif //BT_USE_SSE
#endif //BT_USE_SSE

View File

@@ -9,14 +9,14 @@
#define BulletTest_Test_v3sdiv_h
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
int Test_v3sdiv(void);
int Test_v3sdiv(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -5,10 +5,8 @@
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#if defined(BT_USE_SSE_IN_API) || defined(BT_USE_NEON)
#include "Test_v3skew.h"
#include "vector.h"
@@ -20,51 +18,52 @@
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static void
static void
v3skew_ref(
const btVector3* v,
const btVector3* v,
btVector3* v1,
btVector3* v2,
btVector3* v3);
btVector3* v3);
#define LOOPCOUNT 2048
#define NUM_CYCLES 10000
int Test_v3skew(void)
{
btVector3 v, v1, v2, v3, vt1, vt2, vt3;
float x,y,z,w;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = BT_NAN; // w channel NaN
v.setValue(x,y,z);
btVector3 v, v1, v2, v3, vt1, vt2, vt3;
float x, y, z, w;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = BT_NAN; // w channel NaN
v.setValue(x, y, z);
v.setW(w);
v1.setValue(w,w,w);
v1.setValue(w, w, w);
v1.setW(w);
vt3 = vt2 = vt1 = v3 = v2 = v1;
{
vt3 = vt2 = vt1 = v3 = v2 = v1;
{
v3skew_ref(&v, &v1, &v2, &v3);
v.getSkewSymmetricMatrix(&vt1, &vt2, &vt3);
/*
/*
if( v1.m_floats[0] != vt1.m_floats[0] ||
v1.m_floats[1] != vt1.m_floats[1] ||
v1.m_floats[2] != vt1.m_floats[2] )
*/
if(!(v1 == vt1))
{
vlog( "Error - v3skew result error! "
"\ncorrect v1 = (%10.4f, %10.4f, %10.4f) "
"\ntested v1 = (%10.4f, %10.4f, %10.4f) \n",
v1.m_floats[0], v1.m_floats[1], v1.m_floats[2],
vt1.m_floats[0], vt1.m_floats[1], vt1.m_floats[2]);
if (!(v1 == vt1))
{
vlog(
"Error - v3skew result error! "
"\ncorrect v1 = (%10.4f, %10.4f, %10.4f) "
"\ntested v1 = (%10.4f, %10.4f, %10.4f) \n",
v1.m_floats[0], v1.m_floats[1], v1.m_floats[2],
vt1.m_floats[0], vt1.m_floats[1], vt1.m_floats[2]);
return 1;
}
@@ -73,14 +72,15 @@ int Test_v3skew(void)
v2.m_floats[1] != vt2.m_floats[1] ||
v2.m_floats[2] != vt2.m_floats[2] )
*/
if(!(v2 == vt2))
{
vlog( "Error - v3skew result error! "
"\ncorrect v2 = (%10.4f, %10.4f, %10.4f) "
"\ntested v2 = (%10.4f, %10.4f, %10.4f) \n",
v2.m_floats[0], v2.m_floats[1], v2.m_floats[2],
vt2.m_floats[0], vt2.m_floats[1], vt2.m_floats[2]);
if (!(v2 == vt2))
{
vlog(
"Error - v3skew result error! "
"\ncorrect v2 = (%10.4f, %10.4f, %10.4f) "
"\ntested v2 = (%10.4f, %10.4f, %10.4f) \n",
v2.m_floats[0], v2.m_floats[1], v2.m_floats[2],
vt2.m_floats[0], vt2.m_floats[1], vt2.m_floats[2]);
return 1;
}
@@ -89,18 +89,19 @@ int Test_v3skew(void)
v3.m_floats[1] != vt3.m_floats[1] ||
v3.m_floats[2] != vt3.m_floats[2] )
*/
if(!(v3 == vt3))
{
vlog( "Error - v3skew result error! "
"\ncorrect v3 = (%10.4f, %10.4f, %10.4f) "
"\ntested v3 = (%10.4f, %10.4f, %10.4f) \n",
v3.m_floats[0], v3.m_floats[1], v3.m_floats[2],
vt3.m_floats[0], vt3.m_floats[1], vt3.m_floats[2]);
if (!(v3 == vt3))
{
vlog(
"Error - v3skew result error! "
"\ncorrect v3 = (%10.4f, %10.4f, %10.4f) "
"\ntested v3 = (%10.4f, %10.4f, %10.4f) \n",
v3.m_floats[0], v3.m_floats[1], v3.m_floats[2],
vt3.m_floats[0], vt3.m_floats[1], vt3.m_floats[2]);
return 1;
}
}
#define DATA_SIZE 256
btVector3 v3_arr0[DATA_SIZE];
@@ -108,90 +109,89 @@ int Test_v3skew(void)
btVector3 v3_arr2[DATA_SIZE];
btVector3 v3_arr3[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
for( k = 0; k < DATA_SIZE; k++ )
for (k = 0; k < DATA_SIZE; k++)
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v3_arr0[k].setValue(x,y,z);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v3_arr0[k].setValue(x, y, z);
v3_arr0[k].setW(w);
v3_arr1[k].setValue(w,w,w);
v3_arr1[k].setValue(w, w, w);
v3_arr1[k].setW(w);
v3_arr3[k] = v3_arr2[k] = v3_arr1[k];
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
startTime = ReadTicks();
for (k = 0; k < LOOPCOUNT; k++)
{
size_t k32 = (k & (DATA_SIZE-1));
v3skew_ref( &v3_arr0[k32], &v3_arr1[k32], &v3_arr2[k32], &v3_arr3[k32]);
size_t k32 = (k & (DATA_SIZE - 1));
v3skew_ref(&v3_arr0[k32], &v3_arr1[k32], &v3_arr2[k32], &v3_arr3[k32]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
scalarTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
startTime = ReadTicks();
for (k = 0; k < LOOPCOUNT; k++)
{
size_t k32 = (k & (DATA_SIZE -1));
v3_arr0[k32].getSkewSymmetricMatrix(&v3_arr1[k32], &v3_arr2[k32], &v3_arr3[k32]);
size_t k32 = (k & (DATA_SIZE - 1));
v3_arr0[k32].getSkewSymmetricMatrix(&v3_arr1[k32], &v3_arr2[k32], &v3_arr3[k32]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vectorTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT );
vlog("Timing:\n");
vlog(" \t scalar\t vector\n");
vlog(" \t%10.4f\t%10.4f\n", TicksToCycles(scalarTime) / LOOPCOUNT, TicksToCycles(vectorTime) / LOOPCOUNT);
return 0;
return 0;
}
static void
static void
v3skew_ref(
const btVector3* v,
btVector3* v1,
btVector3* v2,
const btVector3* v,
btVector3* v1,
btVector3* v2,
btVector3* v3)
{
v1->setValue(0. ,-v->z(),v->y());
v2->setValue(v->z() ,0. ,-v->x());
v3->setValue(-v->y(),v->x() ,0.);
v1->setValue(0., -v->z(), v->y());
v2->setValue(v->z(), 0., -v->x());
v3->setValue(-v->y(), v->x(), 0.);
}
#endif //BT_USE_SSE
#endif //BT_USE_SSE

View File

@@ -9,14 +9,14 @@
#define BulletTest_Test_v3skew_h
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
int Test_v3skew(void);
int Test_v3skew(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -5,10 +5,8 @@
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#if defined(BT_USE_SSE_IN_API) || defined(BT_USE_NEON)
#include "Test_v3triple.h"
#include "vector.h"
@@ -20,9 +18,9 @@
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static btScalar
static btScalar
v3triple_ref(
const btVector3& v,
const btVector3& v,
const btVector3& v1,
const btVector3& v2);
@@ -31,150 +29,154 @@ v3triple_ref(
int Test_v3triple(void)
{
btVector3 v1, v2, v3;
float x,y,z,w;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = BT_NAN; // w channel NaN
v1.setValue(x,y,z);
btVector3 v1, v2, v3;
float x, y, z, w;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = BT_NAN; // w channel NaN
v1.setValue(x, y, z);
v1.setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v2.setValue(x,y,z);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v2.setValue(x, y, z);
v2.setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v3.setValue(x,y,z);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v3.setValue(x, y, z);
v3.setW(w);
float correctTriple0, testTriple0;
{
float correctTriple0, testTriple0;
{
correctTriple0 = w;
testTriple0 = w;
testTriple0 = v3triple_ref(v1,v2,v3);
testTriple0 = v3triple_ref(v1, v2, v3);
correctTriple0 = v1.triple(v2, v3);
if( fabsf(correctTriple0 - testTriple0) > FLT_EPSILON * 4 )
if (fabsf(correctTriple0 - testTriple0) > FLT_EPSILON * 4)
{
vlog( "Error - v3triple result error! %f != %f \n", correctTriple0, testTriple0);
vlog("Error - v3triple result error! %f != %f \n", correctTriple0, testTriple0);
return 1;
}
}
#define DATA_SIZE 1024
btVector3 v3_arr1[DATA_SIZE];
btVector3 v3_arr2[DATA_SIZE];
btVector3 v3_arr3[DATA_SIZE];
btScalar res_arr[DATA_SIZE];
btScalar res_arr[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
for( k = 0; k < DATA_SIZE; k++ )
for (k = 0; k < DATA_SIZE; k++)
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v3_arr1[k].setValue(x,y,z);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v3_arr1[k].setValue(x, y, z);
v3_arr1[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v3_arr2[k].setValue(x,y,z);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v3_arr2[k].setValue(x, y, z);
v3_arr2[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v3_arr3[k].setValue(x,y,z);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v3_arr3[k].setValue(x, y, z);
v3_arr3[k].setW(w);
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = uint64_t(-1LL);
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
uint64_t startTime, bestTime, currentTime;
bestTime = uint64_t(-1LL);
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
startTime = ReadTicks();
for (k = 0; k + 4 <= LOOPCOUNT; k += 4)
{
size_t k32 = (k & (DATA_SIZE-1));
res_arr[k32] = v3triple_ref( v3_arr1[k32], v3_arr2[k32], v3_arr3[k32]); k32++;
res_arr[k32] = v3triple_ref( v3_arr1[k32], v3_arr2[k32], v3_arr3[k32]); k32++;
res_arr[k32] = v3triple_ref( v3_arr1[k32], v3_arr2[k32], v3_arr3[k32]); k32++;
res_arr[k32] = v3triple_ref( v3_arr1[k32], v3_arr2[k32], v3_arr3[k32]);
size_t k32 = (k & (DATA_SIZE - 1));
res_arr[k32] = v3triple_ref(v3_arr1[k32], v3_arr2[k32], v3_arr3[k32]);
k32++;
res_arr[k32] = v3triple_ref(v3_arr1[k32], v3_arr2[k32], v3_arr3[k32]);
k32++;
res_arr[k32] = v3triple_ref(v3_arr1[k32], v3_arr2[k32], v3_arr3[k32]);
k32++;
res_arr[k32] = v3triple_ref(v3_arr1[k32], v3_arr2[k32], v3_arr3[k32]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = uint64_t(-1LL);
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
scalarTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = uint64_t(-1LL);
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
startTime = ReadTicks();
for (k = 0; k + 4 <= LOOPCOUNT; k += 4)
{
size_t k32 = k & (DATA_SIZE -1);
res_arr[k32] = v3_arr1[k32].triple(v3_arr2[k32], v3_arr3[k32]); k32++;
res_arr[k32] = v3_arr1[k32].triple(v3_arr2[k32], v3_arr3[k32]); k32++;
res_arr[k32] = v3_arr1[k32].triple(v3_arr2[k32], v3_arr3[k32]); k32++;
res_arr[k32] = v3_arr1[k32].triple(v3_arr2[k32], v3_arr3[k32]);
size_t k32 = k & (DATA_SIZE - 1);
res_arr[k32] = v3_arr1[k32].triple(v3_arr2[k32], v3_arr3[k32]);
k32++;
res_arr[k32] = v3_arr1[k32].triple(v3_arr2[k32], v3_arr3[k32]);
k32++;
res_arr[k32] = v3_arr1[k32].triple(v3_arr2[k32], v3_arr3[k32]);
k32++;
res_arr[k32] = v3_arr1[k32].triple(v3_arr2[k32], v3_arr3[k32]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vectorTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT );
vlog("Timing:\n");
vlog(" \t scalar\t vector\n");
vlog(" \t%10.4f\t%10.4f\n", TicksToCycles(scalarTime) / LOOPCOUNT, TicksToCycles(vectorTime) / LOOPCOUNT);
return 0;
return 0;
}
static btScalar
static btScalar
v3triple_ref(
const btVector3& v,
const btVector3& v1,
const btVector3& v,
const btVector3& v1,
const btVector3& v2)
{
return
v.m_floats[0] * (v1.m_floats[1] * v2.m_floats[2] - v1.m_floats[2] * v2.m_floats[1]) +
v.m_floats[1] * (v1.m_floats[2] * v2.m_floats[0] - v1.m_floats[0] * v2.m_floats[2]) +
v.m_floats[2] * (v1.m_floats[0] * v2.m_floats[1] - v1.m_floats[1] * v2.m_floats[0]);
return v.m_floats[0] * (v1.m_floats[1] * v2.m_floats[2] - v1.m_floats[2] * v2.m_floats[1]) +
v.m_floats[1] * (v1.m_floats[2] * v2.m_floats[0] - v1.m_floats[0] * v2.m_floats[2]) +
v.m_floats[2] * (v1.m_floats[0] * v2.m_floats[1] - v1.m_floats[1] * v2.m_floats[0]);
}
#endif //BT_USE_SSE
#endif //BT_USE_SSE

View File

@@ -9,14 +9,14 @@
#define BulletTest_Test_v3triple_h
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
int Test_v3triple(void);
int Test_v3triple(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -14,7 +14,7 @@
#else
#include "LinearMath/btAlignedAllocator.h"
#endif //__APPLE__
#endif //__APPLE__
#include <stdlib.h>
@@ -26,247 +26,240 @@ int gReportNanoseconds = 0;
#ifdef _WIN32
#include <intrin.h>
uint64_t ReadTicks( void )
uint64_t ReadTicks(void)
{
return __rdtsc();
return __rdtsc();
}
double TicksToCycles( uint64_t delta )
double TicksToCycles(uint64_t delta)
{
return double(delta);
}
double TicksToSeconds( uint64_t delta )
double TicksToSeconds(uint64_t delta)
{
return double(delta);
}
void *GuardCalloc( size_t count, size_t size, size_t *objectStride )
void *GuardCalloc(size_t count, size_t size, size_t *objectStride)
{
if (objectStride)
*objectStride = size;
return (void*) btAlignedAlloc(count * size,16);
return (void *)btAlignedAlloc(count * size, 16);
}
void GuardFree( void *buf )
void GuardFree(void *buf)
{
btAlignedFree(buf);
}
#endif
#ifdef __APPLE__
uint64_t ReadTicks( void )
uint64_t ReadTicks(void)
{
return mach_absolute_time();
return mach_absolute_time();
}
double TicksToCycles( uint64_t delta )
double TicksToCycles(uint64_t delta)
{
static long double conversion = 0.0L;
if( 0.0L == conversion )
{
// attempt to get conversion to nanoseconds
mach_timebase_info_data_t info;
int err = mach_timebase_info( &info );
if( err )
return __builtin_nanf("");
conversion = (long double) info.numer / info.denom;
// attempt to get conversion to cycles
if( 0 == gReportNanoseconds )
{
uint64_t frequency = 0;
size_t freq_size = sizeof( frequency );
err = sysctlbyname( "hw.cpufrequency_max", &frequency, &freq_size, NULL, 0 );
if( err || 0 == frequency )
vlog( "Failed to get max cpu frequency. Reporting times as nanoseconds.\n" );
else
{
conversion *= 1e-9L /* sec / ns */ * frequency /* cycles / sec */;
vlog( "Reporting times as cycles. (%2.2f MHz)\n", 1e-6 * frequency );
}
}
else
vlog( "Reporting times as nanoseconds.\n" );
}
return (double) (delta * conversion);
static long double conversion = 0.0L;
if (0.0L == conversion)
{
// attempt to get conversion to nanoseconds
mach_timebase_info_data_t info;
int err = mach_timebase_info(&info);
if (err)
return __builtin_nanf("");
conversion = (long double)info.numer / info.denom;
// attempt to get conversion to cycles
if (0 == gReportNanoseconds)
{
uint64_t frequency = 0;
size_t freq_size = sizeof(frequency);
err = sysctlbyname("hw.cpufrequency_max", &frequency, &freq_size, NULL, 0);
if (err || 0 == frequency)
vlog("Failed to get max cpu frequency. Reporting times as nanoseconds.\n");
else
{
conversion *= 1e-9L /* sec / ns */ * frequency /* cycles / sec */;
vlog("Reporting times as cycles. (%2.2f MHz)\n", 1e-6 * frequency);
}
}
else
vlog("Reporting times as nanoseconds.\n");
}
return (double)(delta * conversion);
}
double TicksToSeconds( uint64_t delta )
double TicksToSeconds(uint64_t delta)
{
static long double conversion = 0.0L;
if( 0.0L == conversion )
{
// attempt to get conversion to nanoseconds
mach_timebase_info_data_t info;
int err = mach_timebase_info( &info );
if( err )
return __builtin_nanf("");
conversion = info.numer / (1e9L * info.denom);
}
return (double) (delta * conversion);
static long double conversion = 0.0L;
if (0.0L == conversion)
{
// attempt to get conversion to nanoseconds
mach_timebase_info_data_t info;
int err = mach_timebase_info(&info);
if (err)
return __builtin_nanf("");
conversion = info.numer / (1e9L * info.denom);
}
return (double)(delta * conversion);
}
#pragma mark -
#pragma mark GuardCalloc
#define kPageSize 4096
typedef struct BufInfo
{
void *head;
size_t count;
size_t stride;
size_t totalSize;
}BufInfo;
void *head;
size_t count;
size_t stride;
size_t totalSize;
} BufInfo;
static int GuardMarkBuffer( void *buffer, int flag );
static int GuardMarkBuffer(void *buffer, int flag);
void *GuardCalloc( size_t count, size_t size, size_t *objectStride )
void *GuardCalloc(size_t count, size_t size, size_t *objectStride)
{
if( objectStride )
*objectStride = 0;
// Round size up to a multiple of a page size
size_t stride = (size + kPageSize - 1) & -kPageSize;
//Calculate total size of the allocation
size_t totalSize = count * (stride + kPageSize) + kPageSize;
if (objectStride)
*objectStride = 0;
// Allocate
char *buf = (char*)mmap( NULL,
totalSize,
PROT_READ | PROT_WRITE,
MAP_ANON | MAP_SHARED,
0, 0 );
if( MAP_FAILED == buf )
{
vlog( "mmap failed: %d\n", errno );
return NULL;
}
// Round size up to a multiple of a page size
size_t stride = (size + kPageSize - 1) & -kPageSize;
// Find the first byte of user data
char *result = buf + kPageSize;
//Calculate total size of the allocation
size_t totalSize = count * (stride + kPageSize) + kPageSize;
// Record what we did for posterity
BufInfo *bptr = (BufInfo*) result - 1;
bptr->head = buf;
bptr->count = count;
bptr->stride = stride;
bptr->totalSize = totalSize;
// Place the first guard page. Masks our record above.
if( mprotect(buf, kPageSize, PROT_NONE) )
{
munmap( buf, totalSize);
vlog( "mprotect -1 failed: %d\n", errno );
return NULL;
}
// Place the rest of the guard pages
size_t i;
char *p = result;
for( i = 0; i < count; i++ )
{
p += stride;
if( mprotect(p, kPageSize, PROT_NONE) )
{
munmap( buf, totalSize);
vlog( "mprotect %lu failed: %d\n", i, errno );
return NULL;
}
p += kPageSize;
}
// record the stride from object to object
if( objectStride )
*objectStride = stride + kPageSize;
// return pointer to first object
return result;
// Allocate
char *buf = (char *)mmap(NULL,
totalSize,
PROT_READ | PROT_WRITE,
MAP_ANON | MAP_SHARED,
0, 0);
if (MAP_FAILED == buf)
{
vlog("mmap failed: %d\n", errno);
return NULL;
}
// Find the first byte of user data
char *result = buf + kPageSize;
// Record what we did for posterity
BufInfo *bptr = (BufInfo *)result - 1;
bptr->head = buf;
bptr->count = count;
bptr->stride = stride;
bptr->totalSize = totalSize;
// Place the first guard page. Masks our record above.
if (mprotect(buf, kPageSize, PROT_NONE))
{
munmap(buf, totalSize);
vlog("mprotect -1 failed: %d\n", errno);
return NULL;
}
// Place the rest of the guard pages
size_t i;
char *p = result;
for (i = 0; i < count; i++)
{
p += stride;
if (mprotect(p, kPageSize, PROT_NONE))
{
munmap(buf, totalSize);
vlog("mprotect %lu failed: %d\n", i, errno);
return NULL;
}
p += kPageSize;
}
// record the stride from object to object
if (objectStride)
*objectStride = stride + kPageSize;
// return pointer to first object
return result;
}
void GuardFree( void *buf )
void GuardFree(void *buf)
{
if( mprotect((char*)buf - kPageSize, kPageSize, PROT_READ) )
{
vlog( "Unable to read buf info. GuardFree failed! %p (%d)\n", buf, errno );
return;
}
BufInfo *bptr = (BufInfo*) buf - 1;
if( munmap( bptr->head, bptr->totalSize ) )
vlog( "Unable to unmap data. GuardFree failed! %p (%d)\n", buf, errno );
if (mprotect((char *)buf - kPageSize, kPageSize, PROT_READ))
{
vlog("Unable to read buf info. GuardFree failed! %p (%d)\n", buf, errno);
return;
}
BufInfo *bptr = (BufInfo *)buf - 1;
if (munmap(bptr->head, bptr->totalSize))
vlog("Unable to unmap data. GuardFree failed! %p (%d)\n", buf, errno);
}
int GuardMarkReadOnly( void *buf )
int GuardMarkReadOnly(void *buf)
{
return GuardMarkBuffer(buf, PROT_READ);
return GuardMarkBuffer(buf, PROT_READ);
}
int GuardMarkReadWrite( void *buf)
int GuardMarkReadWrite(void *buf)
{
return GuardMarkBuffer(buf, PROT_READ | PROT_WRITE);
return GuardMarkBuffer(buf, PROT_READ | PROT_WRITE);
}
int GuardMarkWriteOnly( void *buf)
int GuardMarkWriteOnly(void *buf)
{
return GuardMarkBuffer(buf, PROT_WRITE);
return GuardMarkBuffer(buf, PROT_WRITE);
}
static int GuardMarkBuffer( void *buf, int flag )
static int GuardMarkBuffer(void *buf, int flag)
{
if( mprotect((char*)buf - kPageSize, kPageSize, PROT_READ) )
{
vlog( "Unable to read buf info. GuardMarkBuffer %d failed! %p (%d)\n", flag, buf, errno );
return errno;
}
BufInfo *bptr = (BufInfo*) buf - 1;
size_t count = bptr->count;
size_t stride = bptr->stride;
size_t i;
for( i = 0; i < count; i++ )
{
if( mprotect(buf, stride, flag) )
{
vlog( "Unable to protect segment %ld. GuardMarkBuffer %d failed! %p (%d)\n", i, flag, buf, errno );
return errno;
}
bptr += stride + kPageSize;
}
if( mprotect((char*)buf - kPageSize, kPageSize, PROT_NONE) )
{
vlog( "Unable to protect leading guard page. GuardMarkBuffer %d failed! %p (%d)\n", flag, buf, errno );
return errno;
}
return 0;
if (mprotect((char *)buf - kPageSize, kPageSize, PROT_READ))
{
vlog("Unable to read buf info. GuardMarkBuffer %d failed! %p (%d)\n", flag, buf, errno);
return errno;
}
BufInfo *bptr = (BufInfo *)buf - 1;
size_t count = bptr->count;
size_t stride = bptr->stride;
size_t i;
for (i = 0; i < count; i++)
{
if (mprotect(buf, stride, flag))
{
vlog("Unable to protect segment %ld. GuardMarkBuffer %d failed! %p (%d)\n", i, flag, buf, errno);
return errno;
}
bptr += stride + kPageSize;
}
if (mprotect((char *)buf - kPageSize, kPageSize, PROT_NONE))
{
vlog("Unable to protect leading guard page. GuardMarkBuffer %d failed! %p (%d)\n", flag, buf, errno);
return errno;
}
return 0;
}
#endif
uint32_t random_number32(void)
{
return ((uint32_t) rand() << 16) ^ rand();
return ((uint32_t)rand() << 16) ^ rand();
}
uint64_t random_number64(void)
{
return ((uint64_t) rand() << 48) ^
((uint64_t) rand() << 32) ^
((uint64_t) rand() << 16) ^
rand();
return ((uint64_t)rand() << 48) ^
((uint64_t)rand() << 32) ^
((uint64_t)rand() << 16) ^
rand();
}

View File

@@ -10,63 +10,59 @@
#include "btIntDefines.h"
#include <stddef.h>
#include <stdio.h>
#ifdef _WIN32
#define LARGE_FLOAT17 (1.f * powf(2,17))
#define RANDF_16 (random_number32() * powf(2,-16))
#define RANDF_01 ( random_number32() * powf(2,-32) )
#define RANDF ( random_number32() * powf(2,-8) )
#define RANDF_m1p1 (2.0f*( random_number32() * powf(2,-32)-1.0f))
#define LARGE_FLOAT17 (1.f * powf(2, 17))
#define RANDF_16 (random_number32() * powf(2, -16))
#define RANDF_01 (random_number32() * powf(2, -32))
#define RANDF (random_number32() * powf(2, -8))
#define RANDF_m1p1 (2.0f * (random_number32() * powf(2, -32) - 1.0f))
#else
#define LARGE_FLOAT17 (0x1.0p17f)
#define RANDF_16 (random_number32() * 0x1.0p-16f)
#define RANDF_01 ( random_number32() * 0x1.0p-32f )
#define RANDF ( random_number32() * 0x1.0p-8f )
#define RANDF_m1p1 (2.0f*( random_number32() * 0x1.0p-32f )-1.0f)
#endif//_WIN32
#define RANDF_16 (random_number32() * 0x1.0p-16f)
#define RANDF_01 (random_number32() * 0x1.0p-32f)
#define RANDF (random_number32() * 0x1.0p-8f)
#define RANDF_m1p1 (2.0f * (random_number32() * 0x1.0p-32f) - 1.0f)
#endif //_WIN32
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
/*********************
/*********************
* Timing *
*********************/
extern int gReportNanoseconds;
extern int gReportNanoseconds;
uint64_t ReadTicks( void );
double TicksToCycles( uint64_t delta ); // Performance data should be reported in cycles most of the time.
double TicksToSeconds( uint64_t delta );
uint64_t ReadTicks(void);
double TicksToCycles(uint64_t delta); // Performance data should be reported in cycles most of the time.
double TicksToSeconds(uint64_t delta);
/*********************
/*********************
* Guard Heap *
*********************/
// return buffer containing count objects of size size, with guard pages in betweeen.
// The stride between one object and the next is given by objectStride.
// objectStride may be NULL. Objects so created are freed with GuardFree
void *GuardCalloc( size_t count, size_t size, size_t *objectStride );
void GuardFree( void * );
// mark the contents of a guard buffer read-only or write-only. Return 0 on success.
int GuardMarkReadOnly( void *);
int GuardMarkWriteOnly( void *);
int GuardMarkReadWrite( void *);
/*********************
// return buffer containing count objects of size size, with guard pages in betweeen.
// The stride between one object and the next is given by objectStride.
// objectStride may be NULL. Objects so created are freed with GuardFree
void *GuardCalloc(size_t count, size_t size, size_t *objectStride);
void GuardFree(void *);
// mark the contents of a guard buffer read-only or write-only. Return 0 on success.
int GuardMarkReadOnly(void *);
int GuardMarkWriteOnly(void *);
int GuardMarkReadWrite(void *);
/*********************
* Printing *
*********************/
#define vlog( ... ) printf( __VA_ARGS__ )
uint32_t random_number32(void);
uint64_t random_number64(void);
#define vlog(...) printf(__VA_ARGS__)
uint32_t random_number32(void);
uint64_t random_number64(void);
#ifdef __cplusplus
}
}
#endif
#endif

View File

@@ -3,17 +3,17 @@
#define BT_INT_DEFINES_H
#ifdef __GNUC__
#include <stdint.h>
#include <stdint.h>
#elif defined(_MSC_VER)
typedef __int32 int32_t;
typedef __int64 int64_t;
typedef unsigned __int32 uint32_t;
typedef unsigned __int64 uint64_t;
typedef __int32 int32_t;
typedef __int64 int64_t;
typedef unsigned __int32 uint32_t;
typedef unsigned __int64 uint64_t;
#else
typedef int int32_t;
typedef long long int int64_t;
typedef unsigned int uint32_t;
typedef unsigned long long int uint64_t;
typedef int int32_t;
typedef long long int int64_t;
typedef unsigned int uint32_t;
typedef unsigned long long int uint64_t;
#endif
#endif //BT_INT_DEFINES_H
#endif //BT_INT_DEFINES_H

View File

@@ -7,7 +7,7 @@
#include <stdio.h>
#ifdef __APPLE__
#include <libgen.h>
#endif //__APPLE__
#endif //__APPLE__
#include <string.h>
#include <stdlib.h>
@@ -17,7 +17,7 @@
#include "TestList.h"
#include "LinearMath/btScalar.h"
#if defined (BT_USE_NEON) || defined (BT_USE_SSE_IN_API)
#if defined(BT_USE_NEON) || defined(BT_USE_SSE_IN_API)
#ifdef _WIN32
#define strcasecmp _stricmp
@@ -37,284 +37,278 @@ const char **gArgv;
typedef struct TestNode
{
struct TestNode *next;
const char *name;
}TestNode;
struct TestNode *next;
const char *name;
} TestNode;
TestNode *gNodeList = NULL;
static int ParseArgs( int argc, const char *argv[] );
static void PrintUsage( void );
static int Init( void );
static void ListTests(void );
static int ParseArgs(int argc, const char *argv[]);
static void PrintUsage(void);
static int Init(void);
static void ListTests(void);
const char *gArch =
#ifdef __i386__
"i386";
const char *gArch =
#ifdef __i386__
"i386";
#elif defined __x86_64__
"x86_64";
"x86_64";
#elif defined __arm__
"arm";
"arm";
#elif defined _WIN64
"win64";
#elif defined _WIN32
"win32";
#else
#error unknown arch
#error unknown arch
#endif
#include <stdio.h>
int main (int argc, const char * argv[])
int main(int argc, const char *argv[])
{
// Enable just one test programatically (instead of command-line param)
// TestNode *node = (TestNode*) malloc( sizeof( TestNode ) );
// node->name = "btDbvt";
// node->next = 0;
// gNodeList = node;
// Enable just one test programatically (instead of command-line param)
// TestNode *node = (TestNode*) malloc( sizeof( TestNode ) );
// node->name = "btDbvt";
// node->next = 0;
// gNodeList = node;
srand(0.f);
int numPassedTests=0;
int numFailedTests= 0;
int numPassedTests = 0;
int numFailedTests = 0;
int err;
// Parse arguments. Build gNodeList.
if( (err = ParseArgs( argc, argv ) ) )
{
if( EXIT_NO_ERROR == err )
return 0;
PrintUsage();
return err;
}
printf("Arch: %s\n", gArch );
if( gReportAverageTimes )
printf( "Reporting average times.\n" );
else
printf( "Reporting best times.\n" );
// Set a few things up
if( (err = Init() ))
{
printf( "Init failed.\n" );
return err;
}
if( NULL == gNodeList )
{ // test everything
printf( "No function list found. Testing everything...\n" );
size_t i;
for( i = 0; NULL != gTestList[i].test_func; i++ )
{
printf( "\n----------------------------------------------\n" );
printf( "Testing %s:\n", gTestList[i].name );
printf( "----------------------------------------------\n" );
uint64_t startTime = ReadTicks();
int local_error = gTestList[i].test_func();
uint64_t currentTime = ReadTicks() - startTime;
if( local_error )
{
int err;
// Parse arguments. Build gNodeList.
if ((err = ParseArgs(argc, argv)))
{
if (EXIT_NO_ERROR == err)
return 0;
PrintUsage();
return err;
}
printf("Arch: %s\n", gArch);
if (gReportAverageTimes)
printf("Reporting average times.\n");
else
printf("Reporting best times.\n");
// Set a few things up
if ((err = Init()))
{
printf("Init failed.\n");
return err;
}
if (NULL == gNodeList)
{ // test everything
printf("No function list found. Testing everything...\n");
size_t i;
for (i = 0; NULL != gTestList[i].test_func; i++)
{
printf("\n----------------------------------------------\n");
printf("Testing %s:\n", gTestList[i].name);
printf("----------------------------------------------\n");
uint64_t startTime = ReadTicks();
int local_error = gTestList[i].test_func();
uint64_t currentTime = ReadTicks() - startTime;
if (local_error)
{
numFailedTests++;
printf( "*** %s test failed with error: %d\n", gTestList[i].name, local_error );
if( gExitOnError )
return local_error;
if( 0 == err )
err = local_error;
}
else
printf("*** %s test failed with error: %d\n", gTestList[i].name, local_error);
if (gExitOnError)
return local_error;
if (0 == err)
err = local_error;
}
else
{
numPassedTests++;
printf("%s Passed.\t\t\t(%2.2gs)\n", gTestList[i].name, TicksToSeconds(currentTime));
printf("%s Passed.\t\t\t(%2.2gs)\n", gTestList[i].name, TicksToSeconds(currentTime));
}
}
}
else
{ // test just the list
while( NULL != gNodeList )
{
TestNode *currentNode = gNodeList;
gNodeList = gNodeList->next;
// Find the test with that name
size_t i;
for( i = 0; NULL != gTestList[i].test_func; i++ )
if( 0 == strcasecmp( currentNode->name, gTestList[i].name ) )
break;
if( NULL != gTestList[i].test_func )
{
printf( "\n----------------------------------------------\n" );
printf( "Testing %s:\n", gTestList[i].name );
printf( "----------------------------------------------\n" );
uint64_t startTime = ReadTicks();
int local_error = gTestList[i].test_func();
uint64_t currentTime = ReadTicks() - startTime;
if( local_error )
{
}
}
else
{ // test just the list
while (NULL != gNodeList)
{
TestNode *currentNode = gNodeList;
gNodeList = gNodeList->next;
// Find the test with that name
size_t i;
for (i = 0; NULL != gTestList[i].test_func; i++)
if (0 == strcasecmp(currentNode->name, gTestList[i].name))
break;
if (NULL != gTestList[i].test_func)
{
printf("\n----------------------------------------------\n");
printf("Testing %s:\n", gTestList[i].name);
printf("----------------------------------------------\n");
uint64_t startTime = ReadTicks();
int local_error = gTestList[i].test_func();
uint64_t currentTime = ReadTicks() - startTime;
if (local_error)
{
numFailedTests++;
printf( "*** %s test failed with error: %d\n", gTestList[i].name, local_error );
if( gExitOnError )
return local_error;
if( 0 == err )
err = local_error;
}
else
printf("*** %s test failed with error: %d\n", gTestList[i].name, local_error);
if (gExitOnError)
return local_error;
if (0 == err)
err = local_error;
}
else
{
numPassedTests++;
printf("%s Passed.\t\t\t(%2.2gs)\n", gTestList[i].name, TicksToSeconds(currentTime));
printf("%s Passed.\t\t\t(%2.2gs)\n", gTestList[i].name, TicksToSeconds(currentTime));
}
}
else
{
printf( "\n***Error: Test name \"%s\" not found! Skipping.\n", currentNode->name );
err = -1;
if( gExitOnError )
return -1;
}
free( currentNode );
}
}
printf( "\n----------------------------------------------\n" );
printf("numPassedTests = %d, numFailedTests = %d\n",numPassedTests,numFailedTests);
free(gFullPath);
return err;
}
else
{
printf("\n***Error: Test name \"%s\" not found! Skipping.\n", currentNode->name);
err = -1;
if (gExitOnError)
return -1;
}
free(currentNode);
}
}
printf("\n----------------------------------------------\n");
printf("numPassedTests = %d, numFailedTests = %d\n", numPassedTests, numFailedTests);
free(gFullPath);
return err;
}
static int Init( void )
static int Init(void)
{
// init the timer
TicksToCycles(0);
return 0;
// init the timer
TicksToCycles(0);
return 0;
}
static int ParseArgs( int argc, const char *argv[] )
static int ParseArgs(int argc, const char *argv[])
{
int listTests = 0;
TestNode *list = NULL;
gArgc = argc;
gArgv = argv;
gFullPath = (char*)malloc( strlen(argv[0]) + 1);
strcpy(gFullPath, argv[0]);
gAppName = basename( gFullPath );
if( NULL == gAppName )
gAppName = "<unknown app name>";
printf( "%s ", gAppName );
int skipremaining=0;
size_t i;
for( i = 1; i < argc; i++ )
{
const char *arg = argv[i];
printf( "\t%s", arg );
if( arg[0] == '-' )
{
arg++;
while( arg[0] != '\0' )
{
int stop = 0;
switch( arg[0] )
{
case 'a':
gReportAverageTimes ^= 1;
break;
case 'e':
gExitOnError ^= 1;
break;
case 'h':
PrintUsage();
return EXIT_NO_ERROR;
case 'l':
listTests ^= 1;
return EXIT_NO_ERROR;
case 's':
gReportNanoseconds ^= 1;
break;
case ' ':
stop = 1;
break;
case 'N'://ignore the -NSDocumentRevisionsDebugMode argument from XCode 4.3.2
skipremaining = 1;
stop = 1;
break;
default:
printf( "\nError: Unknown flag \'%c\'\n", arg[0] );
return -1;
}
if( stop )
break;
arg++;
}
}
else
{ // add function name to the list
TestNode *node = (TestNode*) malloc( sizeof( TestNode ) );
node->name = arg;
node->next = list;
list = node;
}
if (skipremaining)
break;
}
// reverse the list of test names, and stick on gNodeList
while( list )
{
TestNode *node = list;
TestNode *next = node->next;
node->next = gNodeList;
gNodeList = node;
list = next;
}
printf( "\n" );
if( listTests )
ListTests();
return 0;
int listTests = 0;
TestNode *list = NULL;
gArgc = argc;
gArgv = argv;
gFullPath = (char *)malloc(strlen(argv[0]) + 1);
strcpy(gFullPath, argv[0]);
gAppName = basename(gFullPath);
if (NULL == gAppName)
gAppName = "<unknown app name>";
printf("%s ", gAppName);
int skipremaining = 0;
size_t i;
for (i = 1; i < argc; i++)
{
const char *arg = argv[i];
printf("\t%s", arg);
if (arg[0] == '-')
{
arg++;
while (arg[0] != '\0')
{
int stop = 0;
switch (arg[0])
{
case 'a':
gReportAverageTimes ^= 1;
break;
case 'e':
gExitOnError ^= 1;
break;
case 'h':
PrintUsage();
return EXIT_NO_ERROR;
case 'l':
listTests ^= 1;
return EXIT_NO_ERROR;
case 's':
gReportNanoseconds ^= 1;
break;
case ' ':
stop = 1;
break;
case 'N': //ignore the -NSDocumentRevisionsDebugMode argument from XCode 4.3.2
skipremaining = 1;
stop = 1;
break;
default:
printf("\nError: Unknown flag \'%c\'\n", arg[0]);
return -1;
}
if (stop)
break;
arg++;
}
}
else
{ // add function name to the list
TestNode *node = (TestNode *)malloc(sizeof(TestNode));
node->name = arg;
node->next = list;
list = node;
}
if (skipremaining)
break;
}
// reverse the list of test names, and stick on gNodeList
while (list)
{
TestNode *node = list;
TestNode *next = node->next;
node->next = gNodeList;
gNodeList = node;
list = next;
}
printf("\n");
if (listTests)
ListTests();
return 0;
}
static void PrintUsage( void )
static void PrintUsage(void)
{
printf("\nUsage:\n" );
printf("%s: <-aehls> <test names>", gAppName);
printf("Options:\n");
printf("\t-a\tToggle report average times vs. best times. (Default: best times)\n");
printf("\t-e\tToggle exit immediately on error behavior. (Default: off)\n");
printf("\t-h\tPrint this message.\n");
printf("\t-l\tToggle list available test names. (Default: off)\n");
printf("\t-s\tToggle report times in cycles or nanoseconds. (Default: cycles)\n\n");
printf("\tOptions may be followed by one or more test names. If no test names \n" );
printf("\tare provided, then all tests are run.\n\n");
printf("\nUsage:\n");
printf("%s: <-aehls> <test names>", gAppName);
printf("Options:\n");
printf("\t-a\tToggle report average times vs. best times. (Default: best times)\n");
printf("\t-e\tToggle exit immediately on error behavior. (Default: off)\n");
printf("\t-h\tPrint this message.\n");
printf("\t-l\tToggle list available test names. (Default: off)\n");
printf("\t-s\tToggle report times in cycles or nanoseconds. (Default: cycles)\n\n");
printf("\tOptions may be followed by one or more test names. If no test names \n");
printf("\tare provided, then all tests are run.\n\n");
}
static void ListTests(void )
static void ListTests(void)
{
size_t i;
printf("\nTests:\n");
for( i = 0; NULL != gTestList[i].test_func; i++ )
{
printf( "%19s", gTestList[i].name );
if( NULL != gTestList[i].test_func )
printf( "," );
if( 3 == (i&3) )
printf( "\n" );
}
size_t i;
printf("\nTests:\n");
for (i = 0; NULL != gTestList[i].test_func; i++)
{
printf("%19s", gTestList[i].name);
if (NULL != gTestList[i].test_func)
printf(",");
if (3 == (i & 3))
printf("\n");
}
}
#else
#include <stdio.h>

View File

@@ -9,17 +9,16 @@
#define BulletTest_main_h
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
extern int gReportAverageTimes; // if 0, report best times
extern int gExitOnError; // if non-zero, exit as soon an an error is encountered
extern const char *gAppName; // the name of this application
extern int gReportAverageTimes; // if 0, report best times
extern int gExitOnError; // if non-zero, exit as soon an an error is encountered
extern const char *gAppName; // the name of this application
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -9,62 +9,61 @@
#define BulletTest_vector_h
#ifdef __SSE__
typedef float float4 __attribute__ ((__vector_size__(16)));
#include <xmmintrin.h>
typedef float float4 __attribute__((__vector_size__(16)));
#include <xmmintrin.h>
#endif
#ifdef __SSE2__
typedef double double2 __attribute__ ((__vector_size__(16)));
typedef char char16 __attribute__ ((__vector_size__(16)));
typedef unsigned char uchar16 __attribute__ ((__vector_size__(16)));
typedef short short8 __attribute__ ((__vector_size__(16)));
typedef unsigned short ushort8 __attribute__ ((__vector_size__(16)));
typedef int int4 __attribute__ ((__vector_size__(16)));
// typedef unsigned int uint4 __attribute__ ((__vector_size__(16)));
#ifdef __LP64__
typedef long long2 __attribute__ ((__vector_size__(16)));
typedef unsigned long ulong2 __attribute__ ((__vector_size__(16)));
#else
typedef long long long2 __attribute__ ((__vector_size__(16)));
typedef unsigned long long ulong2 __attribute__ ((__vector_size__(16)));
#endif
#include <emmintrin.h>
typedef double double2 __attribute__((__vector_size__(16)));
typedef char char16 __attribute__((__vector_size__(16)));
typedef unsigned char uchar16 __attribute__((__vector_size__(16)));
typedef short short8 __attribute__((__vector_size__(16)));
typedef unsigned short ushort8 __attribute__((__vector_size__(16)));
typedef int int4 __attribute__((__vector_size__(16)));
// typedef unsigned int uint4 __attribute__ ((__vector_size__(16)));
#ifdef __LP64__
typedef long long2 __attribute__((__vector_size__(16)));
typedef unsigned long ulong2 __attribute__((__vector_size__(16)));
#else
typedef long long long2 __attribute__((__vector_size__(16)));
typedef unsigned long long ulong2 __attribute__((__vector_size__(16)));
#endif
#include <emmintrin.h>
#endif
#ifdef __SSE3__
#include <pmmintrin.h>
#include <pmmintrin.h>
#endif
#ifdef __SSSE3__
#include <tmmintrin.h>
#include <tmmintrin.h>
#endif
#ifdef __SSE4_1__
#include <smmintrin.h>
#include <smmintrin.h>
#endif
#ifdef __arm__
#include <arm/arch.h>
#ifdef _ARM_ARCH_7
#define ARM_NEON_GCC_COMPATIBILITY 1
#include <arm_neon.h>
typedef float float4 __attribute__ ((__vector_size__(16)));
typedef double double2 __attribute__ ((__vector_size__(16)));
typedef char char16 __attribute__ ((__vector_size__(16)));
typedef unsigned char uchar16 __attribute__ ((__vector_size__(16)));
typedef short short8 __attribute__ ((__vector_size__(16)));
typedef unsigned short ushort8 __attribute__ ((__vector_size__(16)));
typedef int int4 __attribute__ ((__vector_size__(16)));
typedef unsigned int uint4 __attribute__ ((__vector_size__(16)));
#ifdef __LP64__
typedef long long2 __attribute__ ((__vector_size__(16)));
typedef unsigned long ulong2 __attribute__ ((__vector_size__(16)));
#else
typedef long long long2 __attribute__ ((__vector_size__(16)));
typedef unsigned long long ulong2 __attribute__ ((__vector_size__(16)));
#endif
#endif
#include <arm/arch.h>
#ifdef _ARM_ARCH_7
#define ARM_NEON_GCC_COMPATIBILITY 1
#include <arm_neon.h>
typedef float float4 __attribute__((__vector_size__(16)));
typedef double double2 __attribute__((__vector_size__(16)));
typedef char char16 __attribute__((__vector_size__(16)));
typedef unsigned char uchar16 __attribute__((__vector_size__(16)));
typedef short short8 __attribute__((__vector_size__(16)));
typedef unsigned short ushort8 __attribute__((__vector_size__(16)));
typedef int int4 __attribute__((__vector_size__(16)));
typedef unsigned int uint4 __attribute__((__vector_size__(16)));
#ifdef __LP64__
typedef long long2 __attribute__((__vector_size__(16)));
typedef unsigned long ulong2 __attribute__((__vector_size__(16)));
#else
typedef long long long2 __attribute__((__vector_size__(16)));
typedef unsigned long long ulong2 __attribute__((__vector_size__(16)));
#endif
#endif
#endif
#endif