Apple contribution for OSX SSE and iOS NEON optimizations unit tests, thanks to Jordan Hubbard, Ian Ollmann and Hristo Hristov.

For OSX:
cd build
./premake_osx xcode4
for iOS:
cd build
./ios_build.sh
./ios_run.sh

Also integrated the branches/StackAllocation to make it easier to multi-thread collision detection in the near future. It avoids changing the btCollisionObject while performing collision detection.

As this is a large patch, some stuff might be temporarily broken, I'll keep an eye out on issues.
This commit is contained in:
erwin.coumans
2012-06-07 00:56:30 +00:00
parent 777b92a2ad
commit 73b217fb07
323 changed files with 30730 additions and 13635 deletions

97
Test/Source/TestList.cpp Normal file
View File

@@ -0,0 +1,97 @@
//
// TestList.c
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include <stdlib.h>
#include "TestList.h"
#include "Test_qtmul.h"
#include "Test_qtmulQV3.h"
#include "Test_qtmulV3Q.h"
#include "Test_qtdot.h"
#include "Test_qtnorm.h"
#include "Test_v3dot.h"
#include "Test_v3sdiv.h"
#include "Test_v3norm.h"
#include "Test_v3cross.h"
#include "Test_v3triple.h"
#include "Test_v3interp.h"
#include "Test_v3lerp.h"
#include "Test_v3skew.h"
#include "Test_v3div.h"
#include "Test_v3rotate.h"
#include "Test_maxdot.h"
#include "Test_mindot.h"
#include "Test_dot3.h"
#include "Test_3x3transpose.h"
#include "Test_3x3transposeTimes.h"
#include "Test_3x3timesTranspose.h"
#include "Test_3x3mulM.h"
#include "Test_3x3mulM1M2.h"
#include "Test_3x3mulMV.h"
#include "Test_3x3mulVM.h"
#include "Test_3x3setRot.h"
#include "Test_3x3getRot.h"
#include "Test_btDbvt.h"
#include "Test_quat_aos_neon.h"
#include "LinearMath/btScalar.h"
#define ENTRY( _name, _func ) { _name, _func }
//
// Test functions have the form int (*TestFunc)( void )
// They return a non-zero result in case of failure.
//
// Please see handy stuff in Utils.h, vector.h when writing your test code.
//
#if defined (BT_USE_NEON) || defined (BT_USE_SSE_IN_API)
TestDesc gTestList[] =
{
ENTRY( "maxdot", Test_maxdot ),
ENTRY( "mindot", Test_mindot ),
ENTRY( "qtmul", Test_qtmul ),
ENTRY( "qtmulQV3", Test_qtmulQV3 ),
ENTRY( "qtmulV3Q", Test_qtmulV3Q ),
ENTRY( "qtdot", Test_qtdot ),
ENTRY( "qtnorm", Test_qtnorm ),
ENTRY( "v3dot", Test_v3dot ),
ENTRY( "v3sdiv", Test_v3sdiv ),
ENTRY( "v3norm", Test_v3norm ),
ENTRY( "v3cross", Test_v3cross ),
ENTRY( "v3triple", Test_v3triple ),
ENTRY( "v3interp", Test_v3interp ),
ENTRY( "v3lerp", Test_v3lerp ),
ENTRY( "v3skew", Test_v3skew ),
ENTRY( "v3div", Test_v3div ),
ENTRY( "v3rotate", Test_v3rotate ),
ENTRY( "dot3", Test_dot3 ),
ENTRY( "3x3transpose", Test_3x3transpose ),
ENTRY( "3x3transposeTimes", Test_3x3transposeTimes ),
ENTRY( "3x3timesTranspose", Test_3x3timesTranspose ),
ENTRY( "3x3mulM", Test_3x3mulM ),
ENTRY( "3x3mulM1M2", Test_3x3mulM1M2 ),
ENTRY( "3x3mulMV", Test_3x3mulMV ),
ENTRY( "3x3mulVM", Test_3x3mulMV ),
ENTRY( "3x3setRot", Test_3x3setRot ),
ENTRY( "3x3getRot", Test_3x3getRot ),
ENTRY( "btDbvt", Test_btDbvt ),
ENTRY("quat_aos_neon", Test_quat_aos_neon),
{ NULL, NULL }
};
#else
TestDesc gTestList[]={{NULL,NULL}};
#endif

28
Test/Source/TestList.h Normal file
View File

@@ -0,0 +1,28 @@
//
// TestList.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_TestList_h
#define BulletTest_TestList_h
#ifdef __cplusplus
extern "C" {
#endif
typedef struct TestDesc
{
const char *name;
int (*test_func)(void); // return 0 for success, non-zero for failure
}TestDesc;
extern TestDesc gTestList[];
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,158 @@
//
// Test_3x3getRot.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_3x3getRot.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btMatrix3x3.h>
#define LOOPCOUNT 1000
#define ARRAY_SIZE 128
static inline btSimdFloat4 rand_f4(void)
{
return btAssign128( RANDF_m1p1, RANDF_m1p1, RANDF_m1p1, BT_NAN ); // w channel NaN
}
static inline btSimdFloat4 qtNAN_f4(void)
{
return btAssign128( BT_NAN, BT_NAN, BT_NAN, BT_NAN );
}
static void M3x3getRot_ref( const btMatrix3x3 &m, btQuaternion &q )
{
btVector3 m_el[3] = { m[0], m[1], m[2] };
btScalar trace = m_el[0].x() + m_el[1].y() + m_el[2].z();
btScalar temp[4];
if (trace > btScalar(0.0))
{
btScalar s = btSqrt(trace + btScalar(1.0));
temp[3]=(s * btScalar(0.5));
s = btScalar(0.5) / s;
temp[0]=((m_el[2].y() - m_el[1].z()) * s);
temp[1]=((m_el[0].z() - m_el[2].x()) * s);
temp[2]=((m_el[1].x() - m_el[0].y()) * s);
}
else
{
int i = m_el[0].x() < m_el[1].y() ?
(m_el[1].y() < m_el[2].z() ? 2 : 1) :
(m_el[0].x() < m_el[2].z() ? 2 : 0);
int j = (i + 1) % 3;
int k = (i + 2) % 3;
btScalar s = btSqrt(m_el[i][i] - m_el[j][j] - m_el[k][k] + btScalar(1.0));
temp[i] = s * btScalar(0.5);
s = btScalar(0.5) / s;
temp[3] = (m_el[k][j] - m_el[j][k]) * s;
temp[j] = (m_el[j][i] + m_el[i][j]) * s;
temp[k] = (m_el[k][i] + m_el[i][k]) * s;
}
q.setValue(temp[0],temp[1],temp[2],temp[3]);
}
static int operator!= ( const btQuaternion &a, const btQuaternion &b )
{
if( fabs(a.x() - b.x()) +
fabs(a.y() - b.y()) +
fabs(a.z() - b.z()) +
fabs(a.w() - b.w()) > FLT_EPSILON * 4)
return 1;
return 0;
}
int Test_3x3getRot(void)
{
// Init an array flanked by guard pages
btMatrix3x3 in1[ARRAY_SIZE];
btQuaternion out[ARRAY_SIZE];
btQuaternion out2[ARRAY_SIZE];
// Init the data
size_t i, j;
for( i = 0; i < ARRAY_SIZE; i++ )
{
in1[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
out[i] = btQuaternion(qtNAN_f4());
out2[i] = btQuaternion(qtNAN_f4());
M3x3getRot_ref(in1[i], out[i]);
in1[i].getRotation(out2[i]);
if( out[i] != out2[i] )
{
vlog( "Error - M3x3getRot result error! ");
vlog( "failure @ %ld\n", i);
vlog( "\ncorrect = (%10.7f, %10.7f, %10.7f, %10.7f) "
"\ntested = (%10.7f, %10.7f, %10.7f, %10.7f) \n",
out[i].x(), out[i].y(), out[i].z(), out[i].w(),
out2[i].x(), out2[i].y(), out2[i].z(), out2[i].w());
return -1;
}
}
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = ~(bestTime&0);//-1ULL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
M3x3getRot_ref(in1[i], out[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
bestTime = ~(bestTime&0);//-1ULL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
{
in1[i].getRotation(out2[i]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog( "Timing:\n" );
vlog( "\t scalar\t vector\n" );
vlog( "\t%10.2f\t%10.2f\n", TicksToCycles( scalarTime ) / ARRAY_SIZE, TicksToCycles( vectorTime ) / ARRAY_SIZE );
return 0;
}
#endif//BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_3x3getRot.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_3x3getRot_h
#define BulletTest_Test_3x3getRot_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_3x3getRot(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,169 @@
//
// Test_3x3mulM.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_3x3mulM.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btMatrix3x3.h>
#define LOOPCOUNT 1000
#define ARRAY_SIZE 128
static inline btSimdFloat4 rand_f4(void)
{
return btAssign128( RANDF_01, RANDF_01, RANDF_01, BT_NAN ); // w channel NaN
}
static btMatrix3x3 M3x3mulM_ref( btMatrix3x3 &in, const btMatrix3x3 &m )
{
btVector3 m_el[3] = { in[0], in[1], in[2] };
in.setValue(
m.tdotx(m_el[0]), m.tdoty(m_el[0]), m.tdotz(m_el[0]),
m.tdotx(m_el[1]), m.tdoty(m_el[1]), m.tdotz(m_el[1]),
m.tdotx(m_el[2]), m.tdoty(m_el[2]), m.tdotz(m_el[2]));
return in;
}
static SIMD_FORCE_INLINE bool fuzzyEqualSlow(const btVector3& ref, const btVector3& other)
{
const btScalar epsilon = SIMD_EPSILON;
return ((btFabs(ref.m_floats[3]-other.m_floats[3])<=epsilon) &&
(btFabs(ref.m_floats[2]-other.m_floats[2])<=epsilon) &&
(btFabs(ref.m_floats[1]-other.m_floats[1])<=epsilon) &&
(btFabs(ref.m_floats[0]-other.m_floats[0])<=epsilon));
}
static int operator!= ( const btMatrix3x3 &a, const btMatrix3x3 &b )
{
if( a.getRow(0) != b.getRow(0) )
{
if (!fuzzyEqualSlow(a.getRow(0),b.getRow(0)))
{
return 1;
}
}
if( a.getRow(1) != b.getRow(1) )
{
if( !fuzzyEqualSlow(a.getRow(1),b.getRow(1)) )
return 1;
}
if( a.getRow(2) != b.getRow(2) )
{
if( !fuzzyEqualSlow(a.getRow(2),b.getRow(2)) )
{
return 1;
}
}
return 0;
}
int Test_3x3mulM(void)
{
// Init an array flanked by guard pages
btMatrix3x3 in1[ARRAY_SIZE];
btMatrix3x3 in2[ARRAY_SIZE];
btMatrix3x3 in3[ARRAY_SIZE];
btMatrix3x3 out[ARRAY_SIZE];
btMatrix3x3 out2[ARRAY_SIZE];
// Init the data
size_t i, j;
for( i = 0; i < ARRAY_SIZE; i++ )
{
in1[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
in2[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
in3[i] = in1[i];
out[i] = M3x3mulM_ref(in1[i], in2[i]);
out2[i] = (in3[i] *= in2[i]);
if( out[i] != out2[i] )
{
vlog( "Error - M3x3mulM result error! ");
vlog( "failure @ %ld\n", i);
btVector3 m0, m1, m2;
m0 = out[i].getRow(0);
m1 = out[i].getRow(1);
m2 = out[i].getRow(2);
vlog( "\ncorrect = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) \n",
m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3],
m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3],
m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]);
m0 = out2[i].getRow(0);
m1 = out2[i].getRow(1);
m2 = out2[i].getRow(2);
vlog( "\ntested = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) \n",
m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3],
m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3],
m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]);
return -1;
}
}
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out[i] = M3x3mulM_ref(in1[i], in2[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out2[i] = (in3[i] *= in2[i]);
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog( "Timing:\n" );
vlog( "\t scalar\t vector\n" );
vlog( "\t%10.2f\t%10.2f\n", TicksToCycles( scalarTime ) / ARRAY_SIZE, TicksToCycles( vectorTime ) / ARRAY_SIZE );
return 0;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_3x3mulM.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_3x3mulM_h
#define BulletTest_Test_3x3mulM_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_3x3mulM(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,164 @@
//
// Test_3x3mulM1M2.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_3x3mulM1M2.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btMatrix3x3.h>
#define LOOPCOUNT 1000
#define ARRAY_SIZE 128
static inline btSimdFloat4 rand_f4(void)
{
return btAssign128( RANDF_01, RANDF_01, RANDF_01, BT_NAN ); // w channel NaN
}
static btMatrix3x3 M3x3mulM1M2_ref( const btMatrix3x3 &m1, const btMatrix3x3 &m2 )
{
return btMatrix3x3(
m2.tdotx(m1[0]), m2.tdoty(m1[0]), m2.tdotz(m1[0]),
m2.tdotx(m1[1]), m2.tdoty(m1[1]), m2.tdotz(m1[1]),
m2.tdotx(m1[2]), m2.tdoty(m1[2]), m2.tdotz(m1[2]));
}
static bool fuzzyEqualSlow(const btVector3& ref, const btVector3& other)
{
const btScalar epsilon = SIMD_EPSILON;
return ((btFabs(ref.m_floats[3]-other.m_floats[3])<=epsilon) &&
(btFabs(ref.m_floats[2]-other.m_floats[2])<=epsilon) &&
(btFabs(ref.m_floats[1]-other.m_floats[1])<=epsilon) &&
(btFabs(ref.m_floats[0]-other.m_floats[0])<=epsilon));
}
static int operator!= ( const btMatrix3x3 &a, const btMatrix3x3 &b )
{
if( a.getRow(0) != b.getRow(0) )
{
if (!fuzzyEqualSlow(a.getRow(0),b.getRow(0)))
{
return 1;
}
}
if( a.getRow(1) != b.getRow(1) )
{
if( !fuzzyEqualSlow(a.getRow(1),b.getRow(1)) )
return 1;
}
if( a.getRow(2) != b.getRow(2) )
{
if( !fuzzyEqualSlow(a.getRow(2),b.getRow(2)) )
{
return 1;
}
}
return 0;
}
int Test_3x3mulM1M2(void)
{
// Init an array flanked by guard pages
btMatrix3x3 in1[ARRAY_SIZE];
btMatrix3x3 in2[ARRAY_SIZE];
btMatrix3x3 out[ARRAY_SIZE];
btMatrix3x3 out2[ARRAY_SIZE];
// Init the data
size_t i, j;
for( i = 0; i < ARRAY_SIZE; i++ )
{
in1[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
in2[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
out[i] = M3x3mulM1M2_ref(in1[i], in2[i]);
out2[i] = (in1[i] * in2[i]);
if( out[i] != out2[i] )
{
vlog( "Error - M3x3mulM1M2 result error! ");
vlog( "failure @ %ld\n", i);
btVector3 m0, m1, m2;
m0 = out[i].getRow(0);
m1 = out[i].getRow(1);
m2 = out[i].getRow(2);
vlog( "\ncorrect = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) \n",
m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3],
m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3],
m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]);
m0 = out2[i].getRow(0);
m1 = out2[i].getRow(1);
m2 = out2[i].getRow(2);
vlog( "\ntested = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) \n",
m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3],
m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3],
m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]);
return -1;
}
}
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out[i] = M3x3mulM1M2_ref(in1[i], in2[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out2[i] = (in1[i] * in2[i]);
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog( "Timing:\n" );
vlog( "\t scalar\t vector\n" );
vlog( "\t%10.2f\t%10.2f\n", TicksToCycles( scalarTime ) / ARRAY_SIZE, TicksToCycles( vectorTime ) / ARRAY_SIZE );
return 0;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_3x3mulM1M2.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_3x3mulM1M2_h
#define BulletTest_Test_3x3mulM1M2_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_3x3mulM1M2(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,112 @@
//
// Test_3x3mulMV.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_3x3mulMV.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btMatrix3x3.h>
#define LOOPCOUNT 1000
#define ARRAY_SIZE 128
static inline btSimdFloat4 rand_f4(void)
{
return btAssign128(RANDF_01, RANDF_01, RANDF_01, BT_NAN ); // w channel NaN
}
static btVector3 M3x3mulMV_ref( const btMatrix3x3 &m, const btVector3 &v )
{
return btVector3(m[0].dot(v), m[1].dot(v), m[2].dot(v));
}
int Test_3x3mulMV(void)
{
// Init an array flanked by guard pages
btMatrix3x3 in1[ARRAY_SIZE];
btVector3 in2[ARRAY_SIZE];
btVector3 out[ARRAY_SIZE];
btVector3 out2[ARRAY_SIZE];
// Init the data
size_t i, j;
for( i = 0; i < ARRAY_SIZE; i++ )
{
in1[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
in2[i] = btVector3(rand_f4());
out[i] = M3x3mulMV_ref(in1[i], in2[i]);
out2[i] = (in1[i] * in2[i]);
if( fabsf(out[i].m_floats[0] - out2[i].m_floats[0]) +
fabsf(out[i].m_floats[1] - out2[i].m_floats[1]) +
fabsf(out[i].m_floats[2] - out2[i].m_floats[2]) +
fabsf(out[i].m_floats[3] - out2[i].m_floats[3]) > FLT_EPSILON*4 )
{
vlog( "Error - M3x3mulMV result error! ");
vlog( "failure @ %ld\n", i);
vlog( "\ncorrect = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f, %10.4f) \n",
out[i].m_floats[0], out[i].m_floats[1], out[i].m_floats[2], out[i].m_floats[3],
out2[i].m_floats[0], out2[i].m_floats[1], out2[i].m_floats[2], out2[i].m_floats[3]);
return 1;
}
}
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out[i] = M3x3mulMV_ref(in1[i], in2[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out2[i] = (in1[i] * in2[i]);
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog( "Timing:\n" );
vlog( "\t scalar\t vector\n" );
vlog( "\t%10.2f\t%10.2f\n", TicksToCycles( scalarTime ) / ARRAY_SIZE, TicksToCycles( vectorTime ) / ARRAY_SIZE );
return 0;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,23 @@
//
// Test_3x3mulMV.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_3x3mulMV_h
#define BulletTest_Test_3x3mulMV_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_3x3mulMV(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,112 @@
//
// Test_3x3mulVM.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_3x3mulVM.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btMatrix3x3.h>
#define LOOPCOUNT 1000
#define ARRAY_SIZE 128
static inline btSimdFloat4 rand_f4(void)
{
return btAssign128( RANDF_01, RANDF_01, RANDF_01, BT_NAN ); // w channel NaN
}
static btVector3 M3x3mulVM_ref( const btVector3 &v, const btMatrix3x3 &m)
{
return btVector3(m.tdotx(v), m.tdoty(v), m.tdotz(v));
}
int Test_3x3mulVM(void)
{
// Init an array flanked by guard pages
btVector3 in1[ARRAY_SIZE];
btMatrix3x3 in2[ARRAY_SIZE];
btVector3 out[ARRAY_SIZE];
btVector3 out2[ARRAY_SIZE];
// Init the data
size_t i, j;
for( i = 0; i < ARRAY_SIZE; i++ )
{
in1[i] = btVector3(rand_f4());
in2[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
out[i] = M3x3mulVM_ref(in1[i], in2[i]);
out2[i] = (in1[i] * in2[i]);
if( fabsf(out[i].m_floats[0] - out2[i].m_floats[0]) +
fabsf(out[i].m_floats[1] - out2[i].m_floats[1]) +
fabsf(out[i].m_floats[2] - out2[i].m_floats[2]) +
fabsf(out[i].m_floats[3] - out2[i].m_floats[3]) > FLT_EPSILON*4 )
{
vlog( "Error - M3x3mulVM result error! ");
vlog( "failure @ %ld\n", i);
vlog( "\ncorrect = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f, %10.4f) \n",
out[i].m_floats[0], out[i].m_floats[1], out[i].m_floats[2], out[i].m_floats[3],
out2[i].m_floats[0], out2[i].m_floats[1], out2[i].m_floats[2], out2[i].m_floats[3]);
return 1;
}
}
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out[i] = M3x3mulVM_ref(in1[i], in2[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out2[i] = (in1[i] * in2[i]);
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog( "Timing:\n" );
vlog( "\t scalar\t vector\n" );
vlog( "\t%10.2f\t%10.2f\n", TicksToCycles( scalarTime ) / ARRAY_SIZE, TicksToCycles( vectorTime ) / ARRAY_SIZE );
return 0;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_3x3mulVM.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_3x3mulVM_h
#define BulletTest_Test_3x3mulVM_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_3x3mulVM(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,171 @@
//
// Test_3x3setRot.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_3x3setRot.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btMatrix3x3.h>
#define LOOPCOUNT 1000
#define ARRAY_SIZE 128
static inline btSimdFloat4 rand_f4(void)
{
return btAssign128( RANDF_01, RANDF_01, RANDF_01, BT_NAN ); // w channel NaN
}
static inline btSimdFloat4 qtrand_f4(void)
{
return btAssign128( RANDF_01, RANDF_01, RANDF_01, RANDF_01 );
}
static btMatrix3x3 M3x3setRot_ref( btMatrix3x3 &m, const btQuaternion &q )
{
btScalar d = q.length2();
btScalar s = btScalar(2.0) / d;
btScalar xs = q.x() * s, ys = q.y() * s, zs = q.z() * s;
btScalar wx = q.w() * xs, wy = q.w() * ys, wz = q.w() * zs;
btScalar xx = q.x() * xs, xy = q.x() * ys, xz = q.x() * zs;
btScalar yy = q.y() * ys, yz = q.y() * zs, zz = q.z() * zs;
m.setValue(
btScalar(1.0) - (yy + zz), xy - wz, xz + wy,
xy + wz, btScalar(1.0) - (xx + zz), yz - wx,
xz - wy, yz + wx, btScalar(1.0) - (xx + yy));
return m;
}
static int operator!= ( const btMatrix3x3 &a, const btMatrix3x3 &b )
{
int i;
btVector3 av3, bv3;
for(i=0; i<3; i++)
{
av3 = a.getRow(i);
bv3 = b.getRow(i);
if( fabs(av3.m_floats[0] - bv3.m_floats[0]) +
fabs(av3.m_floats[1] - bv3.m_floats[1]) +
fabs(av3.m_floats[2] - bv3.m_floats[2]) > FLT_EPSILON * 4)
return 1;
}
return 0;
}
int Test_3x3setRot(void)
{
// Init an array flanked by guard pages
btMatrix3x3 in1[ARRAY_SIZE];
btQuaternion in2[ARRAY_SIZE];
btMatrix3x3 in3[ARRAY_SIZE];
btMatrix3x3 out[ARRAY_SIZE];
btMatrix3x3 out2[ARRAY_SIZE];
// Init the data
size_t i, j;
for( i = 0; i < ARRAY_SIZE; i++ )
{
in1[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
in2[i] = btQuaternion(qtrand_f4());
in3[i] = in1[i];
out[i] = M3x3setRot_ref(in1[i], in2[i]);
in3[i].setRotation(in2[i]);
out2[i] = in3[i];
if( out[i] != out2[i] )
{
vlog( "Error - M3x3setRot result error! ");
vlog( "failure @ %ld\n", i);
btVector3 m0, m1, m2;
m0 = out[i].getRow(0);
m1 = out[i].getRow(1);
m2 = out[i].getRow(2);
vlog( "\ncorrect = (%10.7f, %10.7f, %10.7f, %10.7f) "
"\n (%10.7f, %10.7f, %10.7f, %10.7f) "
"\n (%10.7f, %10.7f, %10.7f, %10.7f) \n",
m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3],
m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3],
m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]);
m0 = out2[i].getRow(0);
m1 = out2[i].getRow(1);
m2 = out2[i].getRow(2);
vlog( "\ntested = (%10.7f, %10.7f, %10.7f, %10.7f) "
"\n (%10.7f, %10.7f, %10.7f, %10.7f) "
"\n (%10.7f, %10.7f, %10.7f, %10.7f) \n",
m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3],
m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3],
m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]);
return -1;
}
}
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out[i] = M3x3setRot_ref(in1[i], in2[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
{
in3[i].setRotation(in2[i]);
out2[i] = in3[i];
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog( "Timing:\n" );
vlog( "\t scalar\t vector\n" );
vlog( "\t%10.2f\t%10.2f\n", TicksToCycles( scalarTime ) / ARRAY_SIZE, TicksToCycles( vectorTime ) / ARRAY_SIZE );
return 0;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_3x3setRot.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_3x3setRot_h
#define BulletTest_Test_3x3setRot_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_3x3setRot(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,117 @@
//
// Test_3x3timesTranspose.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_3x3timesTranspose.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btMatrix3x3.h>
#define LOOPCOUNT 1000
#define ARRAY_SIZE 128
static inline btSimdFloat4 rand_f4(void)
{
return btAssign128( RANDF, RANDF, RANDF, BT_NAN ); // w channel NaN
}
static btMatrix3x3 timesTranspose( const btMatrix3x3 &in, const btMatrix3x3 &m )
{
btVector3 m_el[3] = { in[0], in[1], in[2] };
return btMatrix3x3(
m_el[0].dot(m[0]), m_el[0].dot(m[1]), m_el[0].dot(m[2]),
m_el[1].dot(m[0]), m_el[1].dot(m[1]), m_el[1].dot(m[2]),
m_el[2].dot(m[0]), m_el[2].dot(m[1]), m_el[2].dot(m[2]));
}
static int operator!= ( const btMatrix3x3 &a, const btMatrix3x3 &b )
{
if( a.getRow(0) != b.getRow(0) )
return 1;
if( a.getRow(1) != b.getRow(1) )
return 1;
if( a.getRow(2) != b.getRow(2) )
return 1;
return 0;
}
int Test_3x3timesTranspose(void)
{
// Init an array flanked by guard pages
btMatrix3x3 in1[ARRAY_SIZE];
btMatrix3x3 in2[ARRAY_SIZE];
btMatrix3x3 out[ARRAY_SIZE];
btMatrix3x3 out2[ARRAY_SIZE];
// Init the data
size_t i, j;
for( i = 0; i < ARRAY_SIZE; i++ )
{
in1[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
in2[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
out[i] = timesTranspose(in1[i], in2[i]);
out2[i] = in1[i].timesTranspose(in2[i]);
if( out[i] != out2[i] )
{
printf( "failure @ %ld\n", i);
return -1;
}
}
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++) {
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out[i] = timesTranspose(in1[i], in2[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++) {
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out[i] = in1[i].timesTranspose(in2[i]);
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog( "Timing:\n" );
vlog( "\t scalar\t vector\n" );
vlog( "\t%10.2f\t%10.2f\n", TicksToCycles( scalarTime ) / ARRAY_SIZE, TicksToCycles( vectorTime ) / ARRAY_SIZE );
return 0;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_3x3timesTranspose.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_3x3timesTranspose_h
#define BulletTest_Test_3x3timesTranspose_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_3x3timesTranspose(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,116 @@
//
// Test_3x3transpose.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_3x3transpose.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btMatrix3x3.h>
#define LOOPCOUNT 1000
#define ARRAY_SIZE 1024
static inline btSimdFloat4 rand_f4(void)
{
return btAssign128( RANDF, RANDF, RANDF, BT_NAN ); // w channel NaN
}
static btMatrix3x3 Transpose( btMatrix3x3 &in )
{
btVector3 row0 = in.getRow(0);
btVector3 row1 = in.getRow(1);
btVector3 row2 = in.getRow(2);
btVector3 col0 = btAssign128(row0.x(), row1.x(), row2.x(), 0 );
btVector3 col1 = btAssign128(row0.y(), row1.y(), row2.y(), 0 );
btVector3 col2 = btAssign128(row0.z(), row1.z(), row2.z(), 0);
return btMatrix3x3( col0, col1, col2);
}
static int operator!= ( const btMatrix3x3 &a, const btMatrix3x3 &b )
{
if( a.getRow(0) != b.getRow(0) )
return 1;
if( a.getRow(1) != b.getRow(1) )
return 1;
if( a.getRow(2) != b.getRow(2) )
return 1;
return 0;
}
int Test_3x3transpose(void)
{
// Init an array flanked by guard pages
btMatrix3x3 in[ARRAY_SIZE];
btMatrix3x3 out[ARRAY_SIZE];
btMatrix3x3 out2[ARRAY_SIZE];
// Init the data
size_t i, j;
for( i = 0; i < ARRAY_SIZE; i++ )
{
in[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
out[i] = Transpose(in[i]);
out2[i] = in[i].transpose();
if( out[i] != out2[i] )
{
printf( "failure @ %ld\n", i);
return -1;
}
}
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++) {
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out[i] = Transpose(in[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++) {
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out[i] = in[i].transpose();
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog( "Timing:\n" );
vlog( "\t scalar\t vector\n" );
vlog( "\t%10.2f\t%10.2f\n", TicksToCycles( scalarTime ) / ARRAY_SIZE, TicksToCycles( vectorTime ) / ARRAY_SIZE );
return 0;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_3x3transpose.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_3x3transpose_h
#define BulletTest_Test_3x3transpose_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_3x3transpose(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,168 @@
//
// Test_3x3transposeTimes.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_3x3transposeTimes.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btMatrix3x3.h>
#define LOOPCOUNT 1000
#define ARRAY_SIZE 128
static inline btSimdFloat4 rand_f4(void)
{
return btAssign128( RANDF_01, RANDF_01, RANDF_01, BT_NAN ); // w channel NaN
}
static btMatrix3x3 TransposeTimesReference( const btMatrix3x3 &in, const btMatrix3x3 &m )
{
btVector3 m_el[3] = { in[0], in[1], in[2] };
btSimdFloat4 r0 = btAssign128(m_el[0].x() * m[0].x() + m_el[1].x() * m[1].x() + m_el[2].x() * m[2].x(),
m_el[0].x() * m[0].y() + m_el[1].x() * m[1].y() + m_el[2].x() * m[2].y(),
m_el[0].x() * m[0].z() + m_el[1].x() * m[1].z() + m_el[2].x() * m[2].z(),
0.0f );
btSimdFloat4 r1 = btAssign128( m_el[0].y() * m[0].x() + m_el[1].y() * m[1].x() + m_el[2].y() * m[2].x(),
m_el[0].y() * m[0].y() + m_el[1].y() * m[1].y() + m_el[2].y() * m[2].y(),
m_el[0].y() * m[0].z() + m_el[1].y() * m[1].z() + m_el[2].y() * m[2].z(),
0.0f );
btSimdFloat4 r2 = btAssign128( m_el[0].z() * m[0].x() + m_el[1].z() * m[1].x() + m_el[2].z() * m[2].x(),
m_el[0].z() * m[0].y() + m_el[1].z() * m[1].y() + m_el[2].z() * m[2].y(),
m_el[0].z() * m[0].z() + m_el[1].z() * m[1].z() + m_el[2].z() * m[2].z(),
0.0f );
return btMatrix3x3( r0, r1, r2 );
}
static int operator!= ( const btMatrix3x3 &a, const btMatrix3x3 &b )
{
if( a.getRow(0) != b.getRow(0) )
return 1;
if( a.getRow(1) != b.getRow(1) )
return 1;
if( a.getRow(2) != b.getRow(2) )
return 1;
return 0;
}
int Test_3x3transposeTimes(void)
{
// Init an array flanked by guard pages
btMatrix3x3 in1[ARRAY_SIZE];
btMatrix3x3 in2[ARRAY_SIZE];
btMatrix3x3 out[ARRAY_SIZE];
btMatrix3x3 out2[ARRAY_SIZE];
float maxRelativeError = 0.f;
// Init the data
size_t i, j;
for( i = 0; i < ARRAY_SIZE; i++ )
{
in1[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
in2[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
out[i] = TransposeTimesReference(in1[i], in2[i]);
out2[i] = in1[i].transposeTimes(in2[i]);
if( out[i] != out2[i] )
{
float relativeError = 0.f;
for (int column=0;column<3;column++)
for (int row=0;row<3;row++)
relativeError = btMax(relativeError,btFabs(out2[i][row][column] - out[i][row][column]) / out[i][row][column]);
if (relativeError>1e-6)
{
vlog( "failure @ %ld\n", i);
btVector3 m0, m1, m2;
m0 = out[i].getRow(0);
m1 = out[i].getRow(1);
m2 = out[i].getRow(2);
vlog( "\ncorrect = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) \n",
m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3],
m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3],
m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]);
m0 = out2[i].getRow(0);
m1 = out2[i].getRow(1);
m2 = out2[i].getRow(2);
vlog( "\ntested = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) \n",
m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3],
m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3],
m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]);
return -1;
} else
{
if (relativeError>maxRelativeError)
maxRelativeError = relativeError;
}
}
}
if (maxRelativeError)
{
printf("Warning: maxRelativeError = %e\n",maxRelativeError);
}
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++) {
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out[i] = TransposeTimesReference(in1[i], in2[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++) {
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out[i] = in1[i].transposeTimes(in2[i]);
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog( "Timing:\n" );
vlog( "\t scalar\t vector\n" );
vlog( "\t%10.2f\t%10.2f\n", TicksToCycles( scalarTime ) / ARRAY_SIZE, TicksToCycles( vectorTime ) / ARRAY_SIZE );
return 0;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_3x3transposeTimes.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_3x3transposeTimes_h
#define BulletTest_Test_3x3transposeTimes_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_3x3transposeTimes(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,495 @@
//
// Test_btDbvt.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc., Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_btDbvt.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <BulletCollision/BroadphaseCollision/btDbvt.h>
// reference code for testing purposes
SIMD_FORCE_INLINE bool Intersect_ref( btDbvtAabbMm& a, btDbvtAabbMm& b)
{
return( (a.tMins().x()<=b.tMaxs().x())&&
(a.tMaxs().x()>=b.tMins().x())&&
(a.tMins().y()<=b.tMaxs().y())&&
(a.tMaxs().y()>=b.tMins().y())&&
(a.tMins().z()<=b.tMaxs().z())&&
(a.tMaxs().z()>=b.tMins().z()));
}
SIMD_FORCE_INLINE btScalar Proximity_ref( btDbvtAabbMm& a,
btDbvtAabbMm& b)
{
const btVector3 d=(a.tMins()+a.tMaxs())-(b.tMins()+b.tMaxs());
return(btFabs(d.x())+btFabs(d.y())+btFabs(d.z()));
}
SIMD_FORCE_INLINE int Select_ref( btDbvtAabbMm& o,
btDbvtAabbMm& a,
btDbvtAabbMm& b)
{
return(Proximity_ref(o,a)<Proximity_ref(o,b)?0:1);
}
SIMD_FORCE_INLINE void Merge_ref( btDbvtAabbMm& a,
btDbvtAabbMm& b,
btDbvtAabbMm& r)
{
//
//Changing '3' into '4' to compare with the vector code which changes all 4 floats.
//Erwin: don't do this because the 4th component is ignore and not computed on non-vector code (there is no NEON version and scalar is just 3 components)
//
for(int i=0;i<3;++i)
{
if(a.tMins().m_floats[i]<b.tMins().m_floats[i])
r.tMins().m_floats[i] = a.tMins().m_floats[i];
else
r.tMins().m_floats[i] = b.tMins().m_floats[i];
if(a.tMaxs().m_floats[i]>b.tMaxs().m_floats[i])
r.tMaxs().m_floats[i]=a.tMaxs().m_floats[i];
else
r.tMaxs().m_floats[i]=b.tMaxs().m_floats[i];
}
}
/*
[0] float32_t 0.0318338
[1] float32_t 0.0309355
[2] float32_t 0.93264
[3] float32_t 0.88788
[0] float32_t 0.59133
[1] float32_t 0.478779
[2] float32_t 0.833354
[3] float32_t 0.186335
[0] float32_t 0.242578
[1] float32_t 0.0134696
[2] float32_t 0.383139
[3] float32_t 0.414653
[0] float32_t 0.067769
[1] float32_t 0.993127
[2] float32_t 0.484308
[3] float32_t 0.765338
*/
#define LOOPCOUNT 1000
#define NUM_CYCLES 10000
#define DATA_SIZE 1024
int Test_btDbvt(void)
{
btDbvtAabbMm a[DATA_SIZE], b[DATA_SIZE], c[DATA_SIZE];
btDbvtAabbMm a_ref[DATA_SIZE], b_ref[DATA_SIZE], c_ref[DATA_SIZE];
int i;
bool Intersect_Test_Res[DATA_SIZE], Intersect_Ref_Res[DATA_SIZE];
int Select_Test_Res[DATA_SIZE], Select_Ref_Res[DATA_SIZE];
for (i = 0; i < DATA_SIZE; i++)
{
a[i].tMins().m_floats[0] = (float)rand() / (float)RAND_MAX;
a[i].tMins().m_floats[1] = (float)rand() / (float)RAND_MAX;
a[i].tMins().m_floats[2] = (float)rand() / (float)RAND_MAX;
a[i].tMins().m_floats[3] = (float)rand() / (float)RAND_MAX;
a[i].tMaxs().m_floats[0] = (float)rand() / (float)RAND_MAX;
a[i].tMaxs().m_floats[1] = (float)rand() / (float)RAND_MAX;
a[i].tMaxs().m_floats[2] = (float)rand() / (float)RAND_MAX;
a[i].tMaxs().m_floats[3] = (float)rand() / (float)RAND_MAX;
b[i].tMins().m_floats[0] = (float)rand() / (float)RAND_MAX;
b[i].tMins().m_floats[1] = (float)rand() / (float)RAND_MAX;
b[i].tMins().m_floats[2] = (float)rand() / (float)RAND_MAX;
b[i].tMins().m_floats[3] = (float)rand() / (float)RAND_MAX;
b[i].tMaxs().m_floats[0] = (float)rand() / (float)RAND_MAX;
b[i].tMaxs().m_floats[1] = (float)rand() / (float)RAND_MAX;
b[i].tMaxs().m_floats[2] = (float)rand() / (float)RAND_MAX;
b[i].tMaxs().m_floats[3] = (float)rand() / (float)RAND_MAX;
c[i].tMins().m_floats[0] = (float)rand() / (float)RAND_MAX;
c[i].tMins().m_floats[1] = (float)rand() / (float)RAND_MAX;
c[i].tMins().m_floats[2] = (float)rand() / (float)RAND_MAX;
c[i].tMins().m_floats[3] = (float)rand() / (float)RAND_MAX;
c[i].tMaxs().m_floats[0] = (float)rand() / (float)RAND_MAX;
c[i].tMaxs().m_floats[1] = (float)rand() / (float)RAND_MAX;
c[i].tMaxs().m_floats[2] = (float)rand() / (float)RAND_MAX;
c[i].tMaxs().m_floats[3] = (float)rand() / (float)RAND_MAX;
a_ref[i].tMins().m_floats[0] = a[i].tMins().m_floats[0];
a_ref[i].tMins().m_floats[1] = a[i].tMins().m_floats[1];
a_ref[i].tMins().m_floats[2] = a[i].tMins().m_floats[2];
a_ref[i].tMins().m_floats[3] = a[i].tMins().m_floats[3];
a_ref[i].tMaxs().m_floats[0] = a[i].tMaxs().m_floats[0];
a_ref[i].tMaxs().m_floats[1] = a[i].tMaxs().m_floats[1];
a_ref[i].tMaxs().m_floats[2] = a[i].tMaxs().m_floats[2];
a_ref[i].tMaxs().m_floats[3] = a[i].tMaxs().m_floats[3];
b_ref[i].tMins().m_floats[0] = b[i].tMins().m_floats[0];
b_ref[i].tMins().m_floats[1] = b[i].tMins().m_floats[1];
b_ref[i].tMins().m_floats[2] = b[i].tMins().m_floats[2];
b_ref[i].tMins().m_floats[3] = b[i].tMins().m_floats[3];
b_ref[i].tMaxs().m_floats[0] = b[i].tMaxs().m_floats[0];
b_ref[i].tMaxs().m_floats[1] = b[i].tMaxs().m_floats[1];
b_ref[i].tMaxs().m_floats[2] = b[i].tMaxs().m_floats[2];
b_ref[i].tMaxs().m_floats[3] = b[i].tMaxs().m_floats[3];
c_ref[i].tMins().m_floats[0] = c[i].tMins().m_floats[0];
c_ref[i].tMins().m_floats[1] = c[i].tMins().m_floats[1];
c_ref[i].tMins().m_floats[2] = c[i].tMins().m_floats[2];
c_ref[i].tMins().m_floats[3] = c[i].tMins().m_floats[3];
c_ref[i].tMaxs().m_floats[0] = c[i].tMaxs().m_floats[0];
c_ref[i].tMaxs().m_floats[1] = c[i].tMaxs().m_floats[1];
c_ref[i].tMaxs().m_floats[2] = c[i].tMaxs().m_floats[2];
c_ref[i].tMaxs().m_floats[3] = c[i].tMaxs().m_floats[3];
}
#if 1
for (i = 0; i < DATA_SIZE; i++)
{
Intersect_Test_Res[i] = Intersect(a[i], b[i]);
Intersect_Ref_Res[i] = Intersect_ref(a_ref[i], b_ref[i]);
if(Intersect_Test_Res[i] != Intersect_Ref_Res[i])
{
printf("Diff on %d\n", i);
printf("a_mx_f[0] = %.3f, a_mx_f[1] = %.3f, a_mx_f[2] = %.3f, a_mx_f[3] = %.3f\n", a[i].tMaxs().m_floats[0], a[i].tMaxs().m_floats[1], a[i].tMaxs().m_floats[2], a[i].tMaxs().m_floats[3]);
printf("a_mi_f[0] = %.3f, a_mi_f[1] = %.3f, a_mi_f[2] = %.3f, a_mi_f[3] = %.3f\n", a[i].tMins().m_floats[0], a[i].tMins().m_floats[1], a[i].tMins().m_floats[2], a[i].tMins().m_floats[3]);
printf("b_mx_f[0] = %.3f, b_mx_f[1] = %.3f, b_mx_f[2] = %.3f, b_mx_f[3] = %.3f\n", b[i].tMaxs().m_floats[0], b[i].tMaxs().m_floats[1], b[i].tMaxs().m_floats[2], b[i].tMaxs().m_floats[3]);
printf("b_mi_f[0] = %.3f, b_mi_f[1] = %.3f, b_mi_f[2] = %.3f, b_mi_f[3] = %.3f\n", b[i].tMins().m_floats[0], b[i].tMins().m_floats[1], b[i].tMins().m_floats[2], b[i].tMins().m_floats[3]);
printf("a_mx_f_ref[0] = %.3f, a_mx_f_ref[1] = %.3f, a_mx_f_ref[2] = %.3f, a_mx_f_ref[3] = %.3f\n", a_ref[i].tMaxs().m_floats[0], a_ref[i].tMaxs().m_floats[1], a_ref[i].tMaxs().m_floats[2], a_ref[i].tMaxs().m_floats[3]);
printf("a_mi_f_ref[0] = %.3f, a_mi_f_ref[1] = %.3f, a_mi_f_ref[2] = %.3f, a_mi_f_ref[3] = %.3f\n", a_ref[i].tMins().m_floats[0], a_ref[i].tMins().m_floats[1], a_ref[i].tMins().m_floats[2], a_ref[i].tMins().m_floats[3]);
printf("b_mx_f_ref[0] = %.3f, b_mx_f_ref[1] = %.3f, b_mx_f_ref[2] = %.3f, b_mx_f_ref[3] = %.3f\n", b_ref[i].tMaxs().m_floats[0], b_ref[i].tMaxs().m_floats[1], b_ref[i].tMaxs().m_floats[2], b_ref[i].tMaxs().m_floats[3]);
printf("b_mi_f_ref[0] = %.3f, b_mi_f_ref[1] = %.3f, b_mi_f_ref[2] = %.3f, b_mi_f_ref[3] = %.3f\n", b_ref[i].tMins().m_floats[0], b_ref[i].tMins().m_floats[1], b_ref[i].tMins().m_floats[2], b_ref[i].tMins().m_floats[3]);
}
}
#endif
uint64_t scalarTime;
uint64_t vectorTime;
size_t j;
////////////////////////////////////
//
// Time and Test Intersect
//
////////////////////////////////////
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for (i = 0; i < DATA_SIZE; i++)
{
Intersect_Ref_Res[i] = Intersect_ref(a_ref[i], b_ref[i]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for (i = 0; i < DATA_SIZE; i++)
{
Intersect_Test_Res[i] = Intersect(a[i], b[i]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Intersect Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT );
//printf("scalar = %llu, vector = %llu\n", scalarTime, vectorTime);
for (i = 0; i < DATA_SIZE; i++)
{
if(Intersect_Test_Res[i] != Intersect_Ref_Res[i])
{
printf("Intersect fail at %d\n", i);
return 1;
}
}
////////////////////////////////////
//
// Time and Test Merge
//
////////////////////////////////////
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for (i = 0; i < DATA_SIZE; i++)
{
Merge_ref(a_ref[i], b_ref[i], c_ref[i]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for (i = 0; i < DATA_SIZE; i++)
{
Merge(a[i], b[i], c[i]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Merge Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT );
//printf("scalar = %llu, vector = %llu\n", scalarTime, vectorTime);
/*
c [0] float32_t 0.00455523
[1] float32_t 0.559712
[2] float32_t 0.0795838
[3] float32_t 0.10182
c_ref
[0] float32_t 0.00455523
[1] float32_t 0.559712
[2] float32_t 0.0795838
[3] float32_t 0.552081
c [0] float32_t 0.829904
[1] float32_t 0.692891
[2] float32_t 0.961654
[3] float32_t 0.666956
c_ref
[0] float32_t 0.829904
[1] float32_t 0.692891
[2] float32_t 0.961654
[3] float32_t 0.522878
*/
for (i = 0; i < DATA_SIZE; i++)
{
//ignore 4th component because it is not computed in all code-paths
if( (fabs(c[i].tMaxs().m_floats[0] - c_ref[i].tMaxs().m_floats[0]) > 0.001) ||
(fabs(c[i].tMaxs().m_floats[1] - c_ref[i].tMaxs().m_floats[1]) > 0.001) ||
(fabs(c[i].tMaxs().m_floats[2] - c_ref[i].tMaxs().m_floats[2]) > 0.001) ||
// (fabs(c[i].tMaxs().m_floats[3] - c_ref[i].tMaxs().m_floats[3]) > 0.001) ||
(fabs(c[i].tMins().m_floats[0] - c_ref[i].tMins().m_floats[0]) > 0.001) ||
(fabs(c[i].tMins().m_floats[1] - c_ref[i].tMins().m_floats[1]) > 0.001) ||
(fabs(c[i].tMins().m_floats[2] - c_ref[i].tMins().m_floats[2]) > 0.001)
//|| (fabs(c[i].tMins().m_floats[3] - c_ref[i].tMins().m_floats[3]) > 0.001)
)
//if((c[i].tMaxs().m_floats[0] != c_ref[i].tMaxs().m_floats[0]) || (c[i].tMaxs().m_floats[1] != c_ref[i].tMaxs().m_floats[1]) || (c[i].tMaxs().m_floats[2] != c_ref[i].tMaxs().m_floats[2]) || (c[i].tMaxs().m_floats[3] != c_ref[i].tMaxs().m_floats[3]) || (c[i].tMins().m_floats[0] != c_ref[i].tMins().m_floats[0]) || (c[i].tMins().m_floats[1] != c_ref[i].tMins().m_floats[1]) || (c[i].tMins().m_floats[2] != c_ref[i].tMins().m_floats[2]) || (c[i].tMins().m_floats[3] != c_ref[i].tMins().m_floats[3]))
{
printf("Merge fail at %d with test = %d, ref = %d\n", i, Select_Test_Res[i], Select_Ref_Res[i]);
printf("a_mx_f[0] = %.3f, a_mx_f[1] = %.3f, a_mx_f[2] = %.3f, a_mx_f[3] = %.3f\n", a[i].tMaxs().m_floats[0], a[i].tMaxs().m_floats[1], a[i].tMaxs().m_floats[2], a[i].tMaxs().m_floats[3]);
printf("a_mi_f[0] = %.3f, a_mi_f[1] = %.3f, a_mi_f[2] = %.3f, a_mi_f[3] = %.3f\n", a[i].tMins().m_floats[0], a[i].tMins().m_floats[1], a[i].tMins().m_floats[2], a[i].tMins().m_floats[3]);
printf("b_mx_f[0] = %.3f, b_mx_f[1] = %.3f, b_mx_f[2] = %.3f, b_mx_f[3] = %.3f\n", b[i].tMaxs().m_floats[0], b[i].tMaxs().m_floats[1], b[i].tMaxs().m_floats[2], b[i].tMaxs().m_floats[3]);
printf("b_mi_f[0] = %.3f, b_mi_f[1] = %.3f, b_mi_f[2] = %.3f, b_mi_f[3] = %.3f\n", b[i].tMins().m_floats[0], b[i].tMins().m_floats[1], b[i].tMins().m_floats[2], b[i].tMins().m_floats[3]);
printf("c_mx_f[0] = %.3f, c_mx_f[1] = %.3f, c_mx_f[2] = %.3f, c_mx_f[3] = %.3f\n", c[i].tMaxs().m_floats[0], c[i].tMaxs().m_floats[1], c[i].tMaxs().m_floats[2], c[i].tMaxs().m_floats[3]);
printf("c_mi_f[0] = %.3f, c_mi_f[1] = %.3f, c_mi_f[2] = %.3f, c_mi_f[3] = %.3f\n", c[i].tMins().m_floats[0], c[i].tMins().m_floats[1], c[i].tMins().m_floats[2], c[i].tMins().m_floats[3]);
printf("a_mx_f_ref[0] = %.3f, a_mx_f_ref[1] = %.3f, a_mx_f_ref[2] = %.3f, a_mx_f_ref[3] = %.3f\n", a_ref[i].tMaxs().m_floats[0], a_ref[i].tMaxs().m_floats[1], a_ref[i].tMaxs().m_floats[2], a_ref[i].tMaxs().m_floats[3]);
printf("a_mi_f_ref[0] = %.3f, a_mi_f_ref[1] = %.3f, a_mi_f_ref[2] = %.3f, a_mi_f_ref[3] = %.3f\n", a_ref[i].tMins().m_floats[0], a_ref[i].tMins().m_floats[1], a_ref[i].tMins().m_floats[2], a_ref[i].tMins().m_floats[3]);
printf("b_mx_f_ref[0] = %.3f, b_mx_f_ref[1] = %.3f, b_mx_f_ref[2] = %.3f, b_mx_f_ref[3] = %.3f\n", b_ref[i].tMaxs().m_floats[0], b_ref[i].tMaxs().m_floats[1], b_ref[i].tMaxs().m_floats[2], b_ref[i].tMaxs().m_floats[3]);
printf("b_mi_f_ref[0] = %.3f, b_mi_f_ref[1] = %.3f, b_mi_f_ref[2] = %.3f, b_mi_f_ref[3] = %.3f\n", b_ref[i].tMins().m_floats[0], b_ref[i].tMins().m_floats[1], b_ref[i].tMins().m_floats[2], b_ref[i].tMins().m_floats[3]);
printf("c_mx_f_ref[0] = %.3f, c_mx_f_ref[1] = %.3f, c_mx_f_ref[2] = %.3f, c_mx_f_ref[3] = %.3f\n", c_ref[i].tMaxs().m_floats[0], c_ref[i].tMaxs().m_floats[1], c_ref[i].tMaxs().m_floats[2], c_ref[i].tMaxs().m_floats[3]);
printf("c_mi_f_ref[0] = %.3f, c_mi_f_ref[1] = %.3f, c_mi_f_ref[2] = %.3f, c_mi_f_ref[3] = %.3f\n", c_ref[i].tMins().m_floats[0], c_ref[i].tMins().m_floats[1], c_ref[i].tMins().m_floats[2], c_ref[i].tMins().m_floats[3]);
return 1;
}
}
////////////////////////////////////
//
// Time and Test Select
//
////////////////////////////////////
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for (i = 0; i < DATA_SIZE; i++)
{
Select_Ref_Res[i] = Select_ref(a_ref[i], b_ref[i], c_ref[i]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for (i = 0; i < DATA_SIZE; i++)
{
Select_Test_Res[i] = Select(a[i], b[i], c[i]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Select Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT );
//printf("scalar = %llu, vector = %llu\n", scalarTime, vectorTime);
for (i = 0; i < DATA_SIZE; i++)
{
Select_Ref_Res[i] = Select_ref(a_ref[i], b_ref[i], c_ref[i]);
Select_Test_Res[i] = Select(a[i], b[i], c[i]);
if(Select_Test_Res[i] != Select_Ref_Res[i])
{
printf("Select fail at %d with test = %d, ref = %d\n", i, Select_Test_Res[i], Select_Ref_Res[i]);
printf("a_mx_f[0] = %.3f, a_mx_f[1] = %.3f, a_mx_f[2] = %.3f, a_mx_f[3] = %.3f\n", a[i].tMaxs().m_floats[0], a[i].tMaxs().m_floats[1], a[i].tMaxs().m_floats[2], a[i].tMaxs().m_floats[3]);
printf("a_mi_f[0] = %.3f, a_mi_f[1] = %.3f, a_mi_f[2] = %.3f, a_mi_f[3] = %.3f\n", a[i].tMins().m_floats[0], a[i].tMins().m_floats[1], a[i].tMins().m_floats[2], a[i].tMins().m_floats[3]);
printf("b_mx_f[0] = %.3f, b_mx_f[1] = %.3f, b_mx_f[2] = %.3f, b_mx_f[3] = %.3f\n", b[i].tMaxs().m_floats[0], b[i].tMaxs().m_floats[1], b[i].tMaxs().m_floats[2], b[i].tMaxs().m_floats[3]);
printf("b_mi_f[0] = %.3f, b_mi_f[1] = %.3f, b_mi_f[2] = %.3f, b_mi_f[3] = %.3f\n", b[i].tMins().m_floats[0], b[i].tMins().m_floats[1], b[i].tMins().m_floats[2], b[i].tMins().m_floats[3]);
printf("c_mx_f[0] = %.3f, c_mx_f[1] = %.3f, c_mx_f[2] = %.3f, c_mx_f[3] = %.3f\n", c[i].tMaxs().m_floats[0], c[i].tMaxs().m_floats[1], c[i].tMaxs().m_floats[2], c[i].tMaxs().m_floats[3]);
printf("c_mi_f[0] = %.3f, c_mi_f[1] = %.3f, c_mi_f[2] = %.3f, c_mi_f[3] = %.3f\n", c[i].tMins().m_floats[0], c[i].tMins().m_floats[1], c[i].tMins().m_floats[2], c[i].tMins().m_floats[3]);
printf("a_mx_f_ref[0] = %.3f, a_mx_f_ref[1] = %.3f, a_mx_f_ref[2] = %.3f, a_mx_f_ref[3] = %.3f\n", a_ref[i].tMaxs().m_floats[0], a_ref[i].tMaxs().m_floats[1], a_ref[i].tMaxs().m_floats[2], a_ref[i].tMaxs().m_floats[3]);
printf("a_mi_f_ref[0] = %.3f, a_mi_f_ref[1] = %.3f, a_mi_f_ref[2] = %.3f, a_mi_f_ref[3] = %.3f\n", a_ref[i].tMins().m_floats[0], a_ref[i].tMins().m_floats[1], a_ref[i].tMins().m_floats[2], a_ref[i].tMins().m_floats[3]);
printf("b_mx_f_ref[0] = %.3f, b_mx_f_ref[1] = %.3f, b_mx_f_ref[2] = %.3f, b_mx_f_ref[3] = %.3f\n", b_ref[i].tMaxs().m_floats[0], b_ref[i].tMaxs().m_floats[1], b_ref[i].tMaxs().m_floats[2], b_ref[i].tMaxs().m_floats[3]);
printf("b_mi_f_ref[0] = %.3f, b_mi_f_ref[1] = %.3f, b_mi_f_ref[2] = %.3f, b_mi_f_ref[3] = %.3f\n", b_ref[i].tMins().m_floats[0], b_ref[i].tMins().m_floats[1], b_ref[i].tMins().m_floats[2], b_ref[i].tMins().m_floats[3]);
printf("c_mx_f_ref[0] = %.3f, c_mx_f_ref[1] = %.3f, c_mx_f_ref[2] = %.3f, c_mx_f_ref[3] = %.3f\n", c_ref[i].tMaxs().m_floats[0], c_ref[i].tMaxs().m_floats[1], c_ref[i].tMaxs().m_floats[2], c_ref[i].tMaxs().m_floats[3]);
printf("c_mi_f_ref[0] = %.3f, c_mi_f_ref[1] = %.3f, c_mi_f_ref[2] = %.3f, c_mi_f_ref[3] = %.3f\n", c_ref[i].tMins().m_floats[0], c_ref[i].tMins().m_floats[1], c_ref[i].tMins().m_floats[2], c_ref[i].tMins().m_floats[3]);
return 1;
}
}
return 0;
}
#endif

View File

@@ -0,0 +1,21 @@
//
// Test_btDbvt.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc., Inc.
//
#ifndef BulletTest_Test_btDbvt_h
#define BulletTest_Test_btDbvt_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_btDbvt(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,153 @@
//
// Test_v3dot.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_dot3.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static btVector3 dot3_ref( const btVector3 &, const btVector3 &, const btVector3 &, const btVector3 &);
static btVector3 dot3_ref( const btVector3 &v, const btVector3 &v1, const btVector3 &v2, const btVector3 &v3)
{
return btVector3( v.dot(v1), v.dot(v2), v.dot(v3));
}
/*
SIMD_FORCE_INLINE int operator!=(const btVector3 &s, const btVector3 &v)
{
#ifdef __SSE__
__m128 test = _mm_cmpneq_ps( s.mVec128, v.mVec128 );
return (_mm_movemask_ps( test ) & 7) != 0;
#elif defined __ARM_NEON_H
uint32x4_t test = vandq_u32( vceqq_f32( s.mVec128, v.mVec128 ), (uint32x4_t){-1,-1,-1,0});
uint32x2_t t = vpadd_u32( vget_low_u32(test), vget_high_u32(test));
t = vpadd_u32(t, t);
return -3 != (int32_t) vget_lane_u32(t, 0);
#else
return s.m_floats[0] != v.m_floats[0] ||
s.m_floats[1] != v.m_floats[1] ||
s.m_floats[2] != v.m_floats[2];
#endif
}
*/
#define LOOPCOUNT 1000
#define NUM_CYCLES 10000
int Test_dot3(void)
{
btVector3 v, v1, v2, v3;
#define DATA_SIZE 1024
btVector3 vec3_arr[DATA_SIZE];
btVector3 vec3_arr1[DATA_SIZE];
btVector3 vec3_arr2[DATA_SIZE];
btVector3 vec3_arr3[DATA_SIZE];
btVector3 res_arr[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
btVector3 correct, test;
for( k = 0; k < DATA_SIZE; k++ )
{
vec3_arr[k] = btVector3( btAssign128( RANDF, RANDF, RANDF, BT_NAN));
vec3_arr1[k] = btVector3( btAssign128( RANDF, RANDF, RANDF, BT_NAN));
vec3_arr2[k] = btVector3( btAssign128( RANDF, RANDF, RANDF, BT_NAN ));
vec3_arr3[k] = btVector3( btAssign128( RANDF, RANDF, RANDF, BT_NAN));
correct = dot3_ref(vec3_arr[k], vec3_arr1[k], vec3_arr2[k], vec3_arr3[k]);
test = vec3_arr[k].dot3( vec3_arr1[k], vec3_arr2[k], vec3_arr3[k]);
if( correct != test )
{
vlog( "Error (%ld) - dot3 result error! *{%a, %a, %a, %a} != {%a, %a, %a, %a} \n", k,
correct.x(), correct.y(), correct.z(), correct.w(),
test.x(), test.y(), test.z(), test.w() );
return 1;
}
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
{
size_t k32 = (k & (DATA_SIZE-1));
res_arr[k32] = dot3_ref( vec3_arr[k32], vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]); k32++;
res_arr[k32] = dot3_ref( vec3_arr[k32], vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]); k32++;
res_arr[k32] = dot3_ref( vec3_arr[k32], vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]); k32++;
res_arr[k32] = dot3_ref( vec3_arr[k32], vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
{
size_t k32 = (k & (DATA_SIZE-1));
res_arr[k32] = vec3_arr[k32].dot3( vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]); k32++;
res_arr[k32] = vec3_arr[k32].dot3( vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]); k32++;
res_arr[k32] = vec3_arr[k32].dot3( vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]); k32++;
res_arr[k32] = vec3_arr[k32].dot3( vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT );
return 0;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_mindot.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_dot3_h
#define BulletTest_Test_dot3_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_dot3(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,281 @@
//
// Test_maxdot.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_maxdot.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static long maxdot_ref( const btSimdFloat4 *vertices,
float *vec,
size_t count,
float *dotResult );
#ifdef __arm__
#define MAX_LOG2_SIZE 9
#else
#define MAX_LOG2_SIZE 10
#endif
#define MAX_SIZE (1U << MAX_LOG2_SIZE)
#define LOOPCOUNT 10
int Test_maxdot(void)
{
// Init an array flanked by guard pages
btSimdFloat4 *data = (btSimdFloat4*) GuardCalloc( 1, MAX_SIZE * sizeof(btSimdFloat4), NULL );
float *fp = (float*) data;
long correct, test;
btVector3 localScaling( 0.1f, 0.2f, 0.3f);
size_t size;
// Init the data
size_t i;
for( i = 0; i < MAX_SIZE; i++ )
{
fp[4*i] = (int32_t) RANDF_16;
fp[4*i+1] = (int32_t) RANDF_16;
fp[4*i+2] = (int32_t) RANDF_16;
fp[4*i+3] = BT_NAN; // w channel NaN
}
float correctDot, testDot;
fp = (float*) localScaling;
float maxRelativeError = 0.f;
for( size = 1; size <= MAX_SIZE; size++ )
{
float *in = (float*)(data + MAX_SIZE - size);
size_t position;
for( position = 0; position < size; position++ )
{
float *biggest = in + position * 4;
float old[4] = { biggest[0], biggest[1], biggest[2], biggest[3] };
biggest[0] += LARGE_FLOAT17;
biggest[1] += LARGE_FLOAT17;
biggest[2] += LARGE_FLOAT17;
biggest[3] += LARGE_FLOAT17;
correctDot = BT_NAN;
testDot = BT_NAN;
correct = maxdot_ref( (btSimdFloat4*) in, (float*) &localScaling, size, &correctDot);
test = localScaling.maxDot( (btVector3*) in, size, testDot);
if( test < 0 || test >= size )
{
vlog( "Error @ %ld: index out of bounds! *%ld vs %ld \n", size, correct, test);
continue;
}
if( correct != test )
{
vlog( "Error @ %ld: index misreported! *%ld vs %ld (*%f, %f)\n", size, correct, test,
fp[0] * in[4*correct] + fp[1] * in[4*correct+1] + fp[2] * in[4*correct+2],
fp[0] * in[4*test] + fp[1] * in[4*test+1] + fp[2] * in[4*test+2] );
return 1;
}
if( test != position )
{
vlog( "Biggest not found where it is supposed to be: *%ld vs %ld (*%f, %f)\n", position, test,
fp[0] * in[4*test] + fp[1] * in[4*test+1] + fp[2] * in[4*test+2],
fp[0] * in[4*position] + fp[1] * in[4*position+1] + fp[2] * in[4*position+2] );
return 1;
}
if( correctDot != testDot )
{
float relativeError = btFabs((testDot - correctDot) / correctDot);
if (relativeError>1e-6)
{
vlog( "Error @ %ld: dotpr misreported! *%f vs %f (*%f, %f)\n", size, correctDot, testDot,
fp[0] * in[4*correct] + fp[1] * in[4*correct+1] + fp[2] * in[4*correct+2],
fp[0] * in[4*test] + fp[1] * in[4*test+1] + fp[2] * in[4*test+2] );
return 1;
} else
{
if (maxRelativeError < relativeError)
{
maxRelativeError = relativeError;
#ifdef VERBOSE_WARNING
sprintf(errStr,"Warning @ %ld: dotpr misreported! *%f vs %f (*%f, %f)\n", size, correctDot, testDot,
fp[0] * in[4*correct] + fp[1] * in[4*correct+1] + fp[2] * in[4*correct+2],
fp[0] * in[4*test] + fp[1] * in[4*test+1] + fp[2] * in[4*test+2]);
#endif //VERBOSE_WARNING
}
}
}
memcpy( biggest, old, 16 );
}
}
if (maxRelativeError)
{
printf("Warning: relative error = %e\n", maxRelativeError);
#ifdef VERBOSE_WARNING
vlog(errStr);
#endif
}
uint64_t scalarTimes[33 + (MAX_LOG2_SIZE-5)];
uint64_t vectorTimes[33 + (MAX_LOG2_SIZE-5)];
size_t j, k;
float *in = (float*) data;
for( size = 1; size <= 32; size++ )
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTimes[size] = 0;
for (j = 0; j < 100; j++) {
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
correct += maxdot_ref( (btSimdFloat4*) in, (float*) &localScaling, size, &correctDot);
currentTime = ReadTicks() - startTime;
scalarTimes[size] += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTimes[size] = bestTime;
else
scalarTimes[size] /= 100;
}
uint64_t *timep = &scalarTimes[33];
for( size = 64; size <= MAX_SIZE; size *= 2 )
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
timep[0] =0;
for (j = 0; j < 100; j++) {
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
correct += maxdot_ref( (btSimdFloat4*) in, (float*) &localScaling, size, &correctDot);
currentTime = ReadTicks() - startTime;
timep[0] += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
timep[0] = bestTime;
else
timep[0] /= 100;
timep++;
}
for( size = 1; size <= 32; size++ )
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTimes[size] = 0;
for (j = 0; j < 100; j++) {
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
test += localScaling.maxDot( (btVector3*) in, size, testDot);
currentTime = ReadTicks() - startTime;
vectorTimes[size] += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTimes[size] = bestTime;
else
vectorTimes[size] /= 100;
}
timep = &vectorTimes[33];
for( size = 64; size <= MAX_SIZE; size *= 2 )
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
timep[0] =0;
for (j = 0; j < 100; j++) {
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
test += localScaling.maxDot( (btVector3*) in, size, testDot);
currentTime = ReadTicks() - startTime;
timep[0] += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
timep[0] = bestTime;
else
timep[0] /= 100;
timep++;
}
vlog( "Timing:\n" );
vlog( " size\t scalar\t vector\n" );
for( size = 1; size <= 32; size++ )
vlog( "%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles( scalarTimes[size] ) / LOOPCOUNT, TicksToCycles( vectorTimes[size] ) / LOOPCOUNT );
size_t index = 33;
for( size = 64; size <= MAX_SIZE; size *= 2 )
{
vlog( "%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles( scalarTimes[index] ) / LOOPCOUNT, TicksToCycles( vectorTimes[index] ) / LOOPCOUNT );
index++;
}
// Useless check to make sure that the timing loops are not optimized away
if( test != correct )
vlog( "Error: Test != correct: *%ld vs. %ld\n", correct, test);
GuardFree(data);
return 0;
}
static long maxdot_ref( const btSimdFloat4 *vertices,
float *vec,
size_t count,
float *dotResult )
{
const float *dp = (const float*) vertices;
float maxDot = -BT_INFINITY;
long i = 0;
long ptIndex = -1;
for( i = 0; i < count; i++ )
{
float dot = vec[0] * dp[0] + vec[1] * dp[1] + vec[2] * dp[2]; dp += 4;
if( dot > maxDot )
{
maxDot = dot;
ptIndex = i;
}
}
*dotResult = maxDot;
return ptIndex;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_maxdot.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_maxdot_h
#define BulletTest_Test_maxdot_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_maxdot(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,269 @@
//
// Test_mindot.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_mindot.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static long mindot_ref( const btSimdFloat4 *vertices,
float *vec,
size_t count,
float *dotResult );
#ifdef __arm__
#define MAX_LOG2_SIZE 9
#else
#define MAX_LOG2_SIZE 9
#endif
#define MAX_SIZE (1U << MAX_LOG2_SIZE)
#define LOOPCOUNT 100
int Test_mindot(void)
{
// Init an array flanked by guard pages
btSimdFloat4 *data = (btSimdFloat4*) GuardCalloc( 1, MAX_SIZE * sizeof(btSimdFloat4), NULL );
float *fp = (float*) data;
long correct, test;
btVector3 localScaling( 0.1f, 0.2f, 0.3f);
size_t size;
// Init the data
size_t i;
for( i = 0; i < MAX_SIZE; i++ )
{
fp[4*i] = (int32_t) RANDF_16;
fp[4*i+1] = (int32_t) RANDF_16;
fp[4*i+2] = (int32_t) RANDF_16;
fp[4*i+3] = BT_NAN; // w channel NaN
}
float correctDot, testDot;
fp = (float*) localScaling;
float maxRelativeError = 0.f;
for( size = 1; size <= MAX_SIZE; size++ )
{
float *in = (float*)(data + MAX_SIZE - size);
size_t position;
for( position = 0; position < size; position++ )
{
float *biggest = in + position * 4;
float old[4] = { biggest[0], biggest[1], biggest[2], biggest[3] };
biggest[0] -= LARGE_FLOAT17;
biggest[1] -= LARGE_FLOAT17;
biggest[2] -= LARGE_FLOAT17;
biggest[3] -= LARGE_FLOAT17;
correctDot = BT_NAN;
testDot = BT_NAN;
correct = mindot_ref( (btSimdFloat4*) in, (float*) &localScaling, size, &correctDot);
test = localScaling.minDot( (btVector3*) in, size, testDot);
if( test < 0 || test >= size )
{
vlog( "Error @ %ld: index out of bounds! *%ld vs %ld \n", size, correct, test);
continue;
}
if( correct != test )
{
vlog( "Error @ %ld: index misreported! *%ld vs %ld (*%f, %f)\n", size, correct, test,
fp[0] * in[4*correct] + fp[1] * in[4*correct+1] + fp[2] * in[4*correct+2],
fp[0] * in[4*test] + fp[1] * in[4*test+1] + fp[2] * in[4*test+2] );
return 1;
}
if( test != position )
{
vlog( "Biggest not found where it is supposed to be: *%ld vs %ld (*%f, %f)\n", position, test,
fp[0] * in[4*test] + fp[1] * in[4*test+1] + fp[2] * in[4*test+2],
fp[0] * in[4*position] + fp[1] * in[4*position+1] + fp[2] * in[4*position+2] );
return 1;
}
if( correctDot != testDot )
{
float relativeError = btFabs((testDot - correctDot) / correctDot);
if (relativeError>1e6)
{
vlog( "Error @ %ld: dotpr misreported! *%f vs %f (*%f, %f)\n", size, correctDot, testDot,
fp[0] * in[4*correct] + fp[1] * in[4*correct+1] + fp[2] * in[4*correct+2],
fp[0] * in[4*test] + fp[1] * in[4*test+1] + fp[2] * in[4*test+2] );
return 1;
} else
{
if (maxRelativeError < relativeError)
{
maxRelativeError = relativeError;
}
}
}
memcpy( biggest, old, 16 );
}
}
if (maxRelativeError)
{
printf("Warning: relative error = %e\n", maxRelativeError);
}
uint64_t scalarTimes[33 + (MAX_LOG2_SIZE-5)];
uint64_t vectorTimes[33 + (MAX_LOG2_SIZE-5)];
size_t j, k;
float *in = (float*) data;
for( size = 1; size <= 32; size++ )
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTimes[size] = 0;
for (j = 0; j < 100; j++) {
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
correct += mindot_ref( (btSimdFloat4*) in, (float*) &localScaling, size, &correctDot);
currentTime = ReadTicks() - startTime;
scalarTimes[size] += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTimes[size] = bestTime;
else
scalarTimes[size] /= 100;
}
uint64_t *timep = &scalarTimes[33];
for( size = 64; size <= MAX_SIZE; size *= 2 )
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
timep[0] =0;
for (j = 0; j < 100; j++) {
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
correct += mindot_ref( (btSimdFloat4*) in, (float*) &localScaling, size, &correctDot);
currentTime = ReadTicks() - startTime;
timep[0] += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
timep[0] = bestTime;
else
timep[0] /= 100;
timep++;
}
for( size = 1; size <= 32; size++ )
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTimes[size] = 0;
for (j = 0; j < 100; j++) {
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
test += localScaling.minDot( (btVector3*) in, size, testDot);
currentTime = ReadTicks() - startTime;
vectorTimes[size] += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTimes[size] = bestTime;
else
vectorTimes[size] /= 100;
}
timep = &vectorTimes[33];
for( size = 64; size <= MAX_SIZE; size *= 2 )
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
timep[0] =0;
for (j = 0; j < 100; j++) {
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
test += localScaling.minDot( (btVector3*) in, size, testDot);
currentTime = ReadTicks() - startTime;
timep[0] += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
timep[0] = bestTime;
else
timep[0] /= 100;
timep++;
}
vlog( "Timing:\n" );
vlog( " size\t scalar\t vector\n" );
for( size = 1; size <= 32; size++ )
vlog( "%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles( scalarTimes[size] ) / LOOPCOUNT, TicksToCycles( vectorTimes[size] ) / LOOPCOUNT );
size_t index = 33;
for( size = 64; size <= MAX_SIZE; size *= 2 )
{
vlog( "%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles( scalarTimes[index] ) / LOOPCOUNT, TicksToCycles( vectorTimes[index] ) / LOOPCOUNT );
index++;
}
// Useless check to make sure that the timing loops are not optimized away
if( test != correct )
vlog( "Error: Test != correct: *%ld vs. %ld\n", correct, test);
GuardFree(data);
return 0;
}
static long mindot_ref( const btSimdFloat4 *vertices,
float *vec,
size_t count,
float *dotResult )
{
const float *dp = (const float*) vertices;
float minDot = BT_INFINITY;
long i = 0;
long ptIndex = -1;
for( i = 0; i < count; i++ )
{
float dot = vec[0] * dp[0] + vec[1] * dp[1] + vec[2] * dp[2]; dp += 4;
if( dot < minDot )
{
minDot = dot;
ptIndex = i;
}
}
*dotResult = minDot;
return ptIndex;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_mindot.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_mindot_h
#define BulletTest_Test_mindot_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_mindot(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,162 @@
//
// Test_qtdot.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_qtdot.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btQuaternion.h>
#define BT_OP(a, b) (a.dot(b))
// reference code for testing purposes
static inline btScalar qtdot_ref(btQuaternion& q1, btQuaternion& q2);
static inline btScalar qtdot_ref(btQuaternion& q1, btQuaternion& q2)
{
return
q1.x() * q2.x() +
q1.y() * q2.y() +
q1.z() * q2.z() +
q1.w() * q2.w();
}
#define LOOPCOUNT 1024
#define NUM_CYCLES 1000
int Test_qtdot(void)
{
btQuaternion q1, q2;
float x, y, z, w, vNaN;
vNaN = BT_NAN; // w channel NaN
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
q1.setValue(x,y,z,w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
q2.setValue(x,y,z,w);
btScalar correct_res, test_res;
{
correct_res = vNaN;
test_res = vNaN;
correct_res = qtdot_ref(q1, q2);
test_res = BT_OP(q1,q2);
if( fabsf(correct_res - test_res) > FLT_EPSILON*4 )
{
vlog( "Error - qtdot result error! "
"\ncorrect = %10.4f "
"\ntested = %10.4f \n",
correct_res, test_res);
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btQuaternion qt_arr1[DATA_SIZE];
btQuaternion qt_arr2[DATA_SIZE];
btScalar res_arr[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
qt_arr1[k].setValue(x,y,z,w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
qt_arr2[k].setValue(x,y,z,w);
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
{
size_t km = (k & (DATA_SIZE-1));
res_arr[km] = qtdot_ref(qt_arr1[km], qt_arr2[km]);km++;
res_arr[km] = qtdot_ref(qt_arr1[km], qt_arr2[km]);km++;
res_arr[km] = qtdot_ref(qt_arr1[km], qt_arr2[km]);km++;
res_arr[km] = qtdot_ref(qt_arr1[km], qt_arr2[km]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
{
size_t km = (k & (DATA_SIZE-1));
res_arr[km] = BT_OP(qt_arr1[km], qt_arr2[km]);km++;
res_arr[km] = BT_OP(qt_arr1[km], qt_arr2[km]);km++;
res_arr[km] = BT_OP(qt_arr1[km], qt_arr2[km]);km++;
res_arr[km] = BT_OP(qt_arr1[km], qt_arr2[km]);km++;
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
return 0;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_qtdot.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_qtdot_h
#define BulletTest_Test_qtdot_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_qtdot(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,183 @@
//
// Test_qtmul.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_qtmul.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btQuaternion.h>
#define BT_OP(a, b) ((a) *= (b))
// reference code for testing purposes
static inline btQuaternion& qtmul_ref(btQuaternion& q1, btQuaternion& q2);
static inline btQuaternion& qtmul_ref(btQuaternion& q1, btQuaternion& q2)
{
float x,y,z,w;
x = q1.w() * q2.x() + q1.x() * q2.w() + q1.y() * q2.z() - q1.z() * q2.y(),
y = q1.w() * q2.y() + q1.y() * q2.w() + q1.z() * q2.x() - q1.x() * q2.z(),
z = q1.w() * q2.z() + q1.z() * q2.w() + q1.x() * q2.y() - q1.y() * q2.x(),
w = q1.w() * q2.w() - q1.x() * q2.x() - q1.y() * q2.y() - q1.z() * q2.z();
q1.setValue(x, y, z, w);
return q1;
}
#define LOOPCOUNT 1024
#define NUM_CYCLES 1000
int Test_qtmul(void)
{
btQuaternion q1, q2, q3;
float x, y, z, w, vNaN;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
vNaN = BT_NAN; // w channel NaN
q1.setValue(x,y,z,w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
q2.setValue(x,y,z,w);
q3 = q1;
btQuaternion correct_res, test_res;
{
float vNaN = BT_NAN;
correct_res.setValue(vNaN, vNaN, vNaN, vNaN);
test_res.setValue(vNaN, vNaN, vNaN, vNaN);
correct_res = qtmul_ref(q1, q2);
test_res = BT_OP(q3,q2);
if( fabsf(correct_res.x() - test_res.x()) +
fabsf(correct_res.y() - test_res.y()) +
fabsf(correct_res.z() - test_res.z()) +
fabsf(correct_res.w() - test_res.w()) > FLT_EPSILON*10 )
{
vlog( "Error - qtmul result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f, %10.4f) \n",
correct_res.x(), correct_res.y(),
correct_res.z(), correct_res.w(),
test_res.x(), test_res.y(),
test_res.z(), test_res.w());
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btQuaternion qt_arr1[DATA_SIZE];
btQuaternion qt_arr2[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
qt_arr1[k].setValue(x,y,z,w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
qt_arr2[k].setValue(x,y,z,w);
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
qt_arr1[k] = qtmul_ref(qt_arr1[k], qt_arr2[k]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
qt_arr1[k].setValue(x,y,z,w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
qt_arr2[k].setValue(x,y,z,w);
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
qt_arr1[k] = BT_OP(qt_arr1[k], qt_arr2[k]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
return 0;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_qtmul.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_qtmul_h
#define BulletTest_Test_qtmul_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_qtmul(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,162 @@
//
// Test_qtmulQV3.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_qtmulQV3.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btQuaternion.h>
#define BT_OP(a, b) ((a) * (b))
// reference code for testing purposes
static inline btQuaternion qtmulQV3_ref(const btQuaternion& q, const btVector3& w);
static inline btQuaternion qtmulQV3_ref(const btQuaternion& q, const btVector3& w)
{
return btQuaternion(
q.w() * w.x() + q.y() * w.z() - q.z() * w.y(),
q.w() * w.y() + q.z() * w.x() - q.x() * w.z(),
q.w() * w.z() + q.x() * w.y() - q.y() * w.x(),
-q.x() * w.x() - q.y() * w.y() - q.z() * w.z());
}
#define LOOPCOUNT 1024
#define NUM_CYCLES 1000
static inline btSimdFloat4 rand_f4(void)
{
return btAssign128( RANDF_m1p1, RANDF_m1p1, RANDF_m1p1, BT_NAN ); // w channel NaN
}
static inline btSimdFloat4 qtrand_f4(void)
{
return btAssign128( RANDF_m1p1, RANDF_m1p1, RANDF_m1p1, RANDF_m1p1 );
}
static inline btSimdFloat4 qtNAN_f4(void)
{
return btAssign128( BT_NAN, BT_NAN, BT_NAN, BT_NAN );
}
int Test_qtmulQV3(void)
{
btQuaternion q;
btVector3 v3;
// Init the data
q = btQuaternion(qtrand_f4());
v3 = btVector3(rand_f4());
btQuaternion correct_res, test_res;
correct_res = btQuaternion(qtNAN_f4());
test_res = btQuaternion(qtNAN_f4());
{
correct_res = qtmulQV3_ref(q, v3);
test_res = BT_OP(q, v3);
if( fabsf(correct_res.x() - test_res.x()) +
fabsf(correct_res.y() - test_res.y()) +
fabsf(correct_res.z() - test_res.z()) +
fabsf(correct_res.w() - test_res.w()) > FLT_EPSILON*8 )
{
vlog( "Error - qtmulQV3 result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f, %10.4f) \n",
correct_res.x(), correct_res.y(),
correct_res.z(), correct_res.w(),
test_res.x(), test_res.y(),
test_res.z(), test_res.w());
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btQuaternion qt_arrR[DATA_SIZE];
btQuaternion qt_arr[DATA_SIZE];
btVector3 v3_arr[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
qt_arr[k] = btQuaternion(qtrand_f4());
v3_arr[k] = btVector3(rand_f4());
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
qt_arrR[k] = qtmulQV3_ref(qt_arr[k], v3_arr[k]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
qt_arr[k] = btQuaternion(qtrand_f4());
v3_arr[k] = btVector3(rand_f4());
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
qt_arrR[k] = BT_OP(qt_arr[k], v3_arr[k]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
return 0;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_qtmulQV3.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_qtmulQV3_h
#define BulletTest_Test_qtmulQV3_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_qtmulQV3(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,161 @@
//
// Test_qtmulV3Q.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_qtmulV3Q.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btQuaternion.h>
#define BT_OP(a, b) ((a) * (b))
// reference code for testing purposes
static inline btQuaternion qtmulV3Q_ref(const btVector3& w, const btQuaternion& q);
static inline btQuaternion qtmulV3Q_ref(const btVector3& w, const btQuaternion& q)
{
return btQuaternion(
+w.x() * q.w() + w.y() * q.z() - w.z() * q.y(),
+w.y() * q.w() + w.z() * q.x() - w.x() * q.z(),
+w.z() * q.w() + w.x() * q.y() - w.y() * q.x(),
-w.x() * q.x() - w.y() * q.y() - w.z() * q.z());
}
#define LOOPCOUNT 1024
#define NUM_CYCLES 1000
static inline btSimdFloat4 rand_f4(void)
{
return btAssign128( RANDF_m1p1, RANDF_m1p1, RANDF_m1p1, BT_NAN ); // w channel NaN
}
static inline btSimdFloat4 qtrand_f4(void)
{
return btAssign128( RANDF_m1p1, RANDF_m1p1, RANDF_m1p1, RANDF_m1p1 );
}
static inline btSimdFloat4 qtNAN_f4(void)
{
return btAssign128( BT_NAN, BT_NAN, BT_NAN, BT_NAN );
}
int Test_qtmulV3Q(void)
{
btQuaternion q;
btVector3 v3;
// Init the data
q = btQuaternion(qtrand_f4());
v3 = btVector3(rand_f4());
btQuaternion correct_res, test_res;
correct_res = btQuaternion(qtNAN_f4());
test_res = btQuaternion(qtNAN_f4());
{
correct_res = qtmulV3Q_ref(v3, q);
test_res = BT_OP(v3, q);
if( fabsf(correct_res.x() - test_res.x()) +
fabsf(correct_res.y() - test_res.y()) +
fabsf(correct_res.z() - test_res.z()) +
fabsf(correct_res.w() - test_res.w()) > FLT_EPSILON*8 )
{
vlog( "Error - qtmulV3Q result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f, %10.4f) \n",
correct_res.x(), correct_res.y(),
correct_res.z(), correct_res.w(),
test_res.x(), test_res.y(),
test_res.z(), test_res.w());
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btQuaternion qt_arrR[DATA_SIZE];
btQuaternion qt_arr[DATA_SIZE];
btVector3 v3_arr[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
qt_arr[k] = btQuaternion(qtrand_f4());
v3_arr[k] = btVector3(rand_f4());
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
qt_arrR[k] = qtmulV3Q_ref(v3_arr[k], qt_arr[k]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
qt_arr[k] = btQuaternion(qtrand_f4());
v3_arr[k] = btVector3(rand_f4());
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
qt_arrR[k] = BT_OP(v3_arr[k], qt_arr[k]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
return 0;
}
#endif//#ifdef BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_qtmulV3Q.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_qtmulV3Q_h
#define BulletTest_Test_qtmulV3Q_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_qtmulV3Q(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,176 @@
//
// Test_qtnorm.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_qtnorm.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btQuaternion.h>
#define BT_OP(a) (a.normalize())
// reference code for testing purposes
static inline btQuaternion& qtnorm_ref(btQuaternion& q1);
static inline btQuaternion& qtnorm_ref(btQuaternion& q1)
{
float dot =
q1.x() * q1.x() +
q1.y() * q1.y() +
q1.z() * q1.z() +
q1.w() * q1.w();
dot = 1.0f / sqrtf(dot);
q1.setValue(q1.x()*dot, q1.y()*dot, q1.z()*dot, q1.w()*dot);
return q1;
}
#define LOOPCOUNT 1024
#define NUM_CYCLES 1000
int Test_qtnorm(void)
{
int i;
btQuaternion q1, q2;
float x, y, z, w, vNaN;
vNaN = BT_NAN; // w channel NaN
btQuaternion correct_res, test_res;
for (i=0; i<LOOPCOUNT; i++)
{
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
q1.setValue(x,y,z,w);
q2 = q1;
correct_res.setValue(vNaN, vNaN, vNaN, vNaN);
test_res.setValue(vNaN, vNaN, vNaN, vNaN);
correct_res = qtnorm_ref(q1);
test_res = BT_OP(q2);
if( fabsf(correct_res.x() - test_res.x()) +
fabsf(correct_res.y() - test_res.y()) +
fabsf(correct_res.z() - test_res.z()) +
fabsf(correct_res.w() - test_res.w()) > FLT_EPSILON*10 )
{
vlog( "Error - qtnorm result error! "
"\ncorrect = (%10.7f, %10.7f, %10.7f, %10.7f) "
"\ntested = (%10.7f, %10.7f, %10.7f, %10.7f) \n",
correct_res.x(), correct_res.y(),
correct_res.z(), correct_res.w(),
test_res.x(), test_res.y(),
test_res.z(), test_res.w());
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btQuaternion qt_arr0[DATA_SIZE];
btQuaternion qt_arr1[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
qt_arr1[k].setValue(x,y,z,w);
}
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
{
size_t km = (k & (DATA_SIZE-1));
qt_arr0[km] = qtnorm_ref(qt_arr1[km]);km++;
qt_arr0[km] = qtnorm_ref(qt_arr1[km]);km++;
qt_arr0[km] = qtnorm_ref(qt_arr1[km]);km++;
qt_arr0[km] = qtnorm_ref(qt_arr1[km]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
qt_arr1[k].setValue(x,y,z,w);
}
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
{
size_t km = (k & (DATA_SIZE-1));
qt_arr0[km] = BT_OP(qt_arr1[km]);km++;
qt_arr0[km] = BT_OP(qt_arr1[km]);km++;
qt_arr0[km] = BT_OP(qt_arr1[km]);km++;
qt_arr0[km] = BT_OP(qt_arr1[km]);km++;
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
return 0;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_qtnorm.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_qtnorm_h
#define BulletTest_Test_qtnorm_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_qtnorm(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,599 @@
//
// Test_quat_aos_neon.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc., Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_quat_aos_neon.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <vectormath/vmInclude.h>
//typedef Vectormath::Aos::Vector3 vmVector3;
//typedef Vectormath::Aos::Quat vmQuat;
//typedef Vectormath::Aos::Matrix3 vmMatrix3;
//typedef Vectormath::Aos::Transform3 vmTransform3;
//typedef Vectormath::Aos::Point3 vmPoint3;
typedef Vectormath::Aos::Vector4 vmVector4;
// reference code for testing purposes
ATTRIBUTE_ALIGNED16(class) Quat_ref
{
float mX;
float mY;
float mZ;
float mW;
public:
// Default constructor; does no initialization
//
inline Quat_ref( ) { };
// Copy a quaternion
//
inline Quat_ref( const Quat_ref & quat );
// Construct a quaternion from x, y, z, and w elements
//
inline Quat_ref( float x, float y, float z, float w );
// Construct a quaternion from a 3-D vector and a scalar
//
inline Quat_ref( const vmVector3 & xyz, float w );
// Copy elements from a 4-D vector into a quaternion
//
explicit inline Quat_ref( const vmVector4 & vec );
// Convert a rotation matrix to a unit-length quaternion
//
explicit inline Quat_ref( const vmMatrix3 & rotMat );
// Set all elements of a quaternion to the same scalar value
//
explicit inline Quat_ref( float scalar );
// Assign one quaternion to another
//
inline Quat_ref & operator =( const Quat_ref & quat );
// Set the x, y, and z elements of a quaternion
// NOTE:
// This function does not change the w element.
//
inline Quat_ref & setXYZ( const vmVector3 & vec );
// Get the x, y, and z elements of a quaternion
//
inline const vmVector3 getXYZ( ) const;
// Set the x element of a quaternion
//
inline Quat_ref & setX( float x );
// Set the y element of a quaternion
//
inline Quat_ref & setY( float y );
// Set the z element of a quaternion
//
inline Quat_ref & setZ( float z );
// Set the w element of a quaternion
//
inline Quat_ref & setW( float w );
// Get the x element of a quaternion
//
inline float getX( ) const;
// Get the y element of a quaternion
//
inline float getY( ) const;
// Get the z element of a quaternion
//
inline float getZ( ) const;
// Get the w element of a quaternion
//
inline float getW( ) const;
// Set an x, y, z, or w element of a quaternion by index
//
inline Quat_ref & setElem( int idx, float value );
// Get an x, y, z, or w element of a quaternion by index
//
inline float getElem( int idx ) const;
// Subscripting operator to set or get an element
//
inline float & operator []( int idx );
// Subscripting operator to get an element
//
inline float operator []( int idx ) const;
// Add two quaternions
//
inline const Quat_ref operator +( const Quat_ref & quat ) const;
// Subtract a quaternion from another quaternion
//
inline const Quat_ref operator -( const Quat_ref & quat ) const;
// Multiply two quaternions
//
inline const Quat_ref operator *( const Quat_ref & quat ) const;
// Multiply a quaternion by a scalar
//
inline const Quat_ref operator *( float scalar ) const;
// Divide a quaternion by a scalar
//
inline const Quat_ref operator /( float scalar ) const;
// Perform compound assignment and addition with a quaternion
//
inline Quat_ref & operator +=( const Quat_ref & quat );
// Perform compound assignment and subtraction by a quaternion
//
inline Quat_ref & operator -=( const Quat_ref & quat );
// Perform compound assignment and multiplication by a quaternion
//
inline Quat_ref & operator *=( const Quat_ref & quat );
// Perform compound assignment and multiplication by a scalar
//
inline Quat_ref & operator *=( float scalar );
// Perform compound assignment and division by a scalar
//
inline Quat_ref & operator /=( float scalar );
// Negate all elements of a quaternion
//
inline const Quat_ref operator -( ) const;
// Construct an identity quaternion
//
static inline const Quat_ref identity( );
// Construct a quaternion to rotate between two unit-length 3-D vectors
// NOTE:
// The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
//
static inline const Quat_ref rotation( const vmVector3 & unitVec0, const vmVector3 & unitVec1 );
// Construct a quaternion to rotate around a unit-length 3-D vector
//
static inline const Quat_ref rotation( float radians, const vmVector3 & unitVec );
// Construct a quaternion to rotate around the x axis
//
static inline const Quat_ref rotationX( float radians );
// Construct a quaternion to rotate around the y axis
//
static inline const Quat_ref rotationY( float radians );
// Construct a quaternion to rotate around the z axis
//
static inline const Quat_ref rotationZ( float radians );
};
inline Quat_ref::Quat_ref( const Quat_ref & quat )
{
mX = quat.mX;
mY = quat.mY;
mZ = quat.mZ;
mW = quat.mW;
}
inline Quat_ref::Quat_ref( float _x, float _y, float _z, float _w )
{
mX = _x;
mY = _y;
mZ = _z;
mW = _w;
}
inline Quat_ref::Quat_ref( const vmVector3 & xyz, float _w )
{
this->setXYZ( xyz );
this->setW( _w );
}
inline Quat_ref::Quat_ref( const vmVector4 & vec )
{
mX = vec.getX();
mY = vec.getY();
mZ = vec.getZ();
mW = vec.getW();
}
inline Quat_ref::Quat_ref( float scalar )
{
mX = scalar;
mY = scalar;
mZ = scalar;
mW = scalar;
}
inline const Quat_ref Quat_ref::identity( )
{
return Quat_ref( 0.0f, 0.0f, 0.0f, 1.0f );
}
inline void loadXYZW_ref( Quat_ref & quat, const float * fptr )
{
quat = Quat_ref( fptr[0], fptr[1], fptr[2], fptr[3] );
}
inline void storeXYZW_ref( const Quat_ref & quat, float * fptr )
{
fptr[0] = quat.getX();
fptr[1] = quat.getY();
fptr[2] = quat.getZ();
fptr[3] = quat.getW();
}
inline Quat_ref & Quat_ref::operator =( const Quat_ref & quat )
{
mX = quat.mX;
mY = quat.mY;
mZ = quat.mZ;
mW = quat.mW;
return *this;
}
inline Quat_ref & Quat_ref::setXYZ( const vmVector3 & vec )
{
mX = vec.getX();
mY = vec.getY();
mZ = vec.getZ();
return *this;
}
inline const vmVector3 Quat_ref::getXYZ( ) const
{
return vmVector3( mX, mY, mZ );
}
inline Quat_ref & Quat_ref::setX( float _x )
{
mX = _x;
return *this;
}
inline float Quat_ref::getX( ) const
{
return mX;
}
inline Quat_ref & Quat_ref::setY( float _y )
{
mY = _y;
return *this;
}
inline float Quat_ref::getY( ) const
{
return mY;
}
inline Quat_ref & Quat_ref::setZ( float _z )
{
mZ = _z;
return *this;
}
inline float Quat_ref::getZ( ) const
{
return mZ;
}
inline Quat_ref & Quat_ref::setW( float _w )
{
mW = _w;
return *this;
}
inline float Quat_ref::getW( ) const
{
return mW;
}
inline Quat_ref & Quat_ref::setElem( int idx, float value )
{
*(&mX + idx) = value;
return *this;
}
inline float Quat_ref::getElem( int idx ) const
{
return *(&mX + idx);
}
inline float & Quat_ref::operator []( int idx )
{
return *(&mX + idx);
}
inline float Quat_ref::operator []( int idx ) const
{
return *(&mX + idx);
}
inline const Quat_ref Quat_ref::operator +( const Quat_ref & quat ) const
{
return Quat_ref(
( mX + quat.mX ),
( mY + quat.mY ),
( mZ + quat.mZ ),
( mW + quat.mW )
);
}
inline const Quat_ref Quat_ref::operator -( const Quat_ref & quat ) const
{
return Quat_ref(
( mX - quat.mX ),
( mY - quat.mY ),
( mZ - quat.mZ ),
( mW - quat.mW )
);
}
inline const Quat_ref Quat_ref::operator *( float scalar ) const
{
return Quat_ref(
( mX * scalar ),
( mY * scalar ),
( mZ * scalar ),
( mW * scalar )
);
}
inline Quat_ref & Quat_ref::operator +=( const Quat_ref & quat )
{
*this = *this + quat;
return *this;
}
inline Quat_ref & Quat_ref::operator -=( const Quat_ref & quat )
{
*this = *this - quat;
return *this;
}
inline Quat_ref & Quat_ref::operator *=( float scalar )
{
*this = *this * scalar;
return *this;
}
inline const Quat_ref Quat_ref::operator /( float scalar ) const
{
return Quat_ref(
( mX / scalar ),
( mY / scalar ),
( mZ / scalar ),
( mW / scalar )
);
}
inline Quat_ref & Quat_ref::operator /=( float scalar )
{
*this = *this / scalar;
return *this;
}
inline const Quat_ref Quat_ref::operator -( ) const
{
return Quat_ref(
-mX,
-mY,
-mZ,
-mW
);
}
inline const Quat_ref operator *( float scalar, const Quat_ref & quat )
{
return quat * scalar;
}
inline float dot( const Quat_ref & quat0, const Quat_ref & quat1 )
{
float result;
result = ( quat0.getX() * quat1.getX() );
result = ( result + ( quat0.getY() * quat1.getY() ) );
result = ( result + ( quat0.getZ() * quat1.getZ() ) );
result = ( result + ( quat0.getW() * quat1.getW() ) );
return result;
}
inline const Quat_ref lerp( float t, const Quat_ref & quat0, const Quat_ref & quat1 )
{
return ( quat0 + ( ( quat1 - quat0 ) * t ) );
}
inline const Quat_ref slerp( float t, const Quat_ref & unitQuat0, const Quat_ref & unitQuat1 )
{
Quat_ref start;
float recipSinAngle, scale0, scale1, cosAngle, angle;
cosAngle = dot( unitQuat0, unitQuat1 );
if ( cosAngle < 0.0f ) {
cosAngle = -cosAngle;
start = ( -unitQuat0 );
} else {
start = unitQuat0;
}
if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
angle = acosf( cosAngle );
recipSinAngle = ( 1.0f / sinf( angle ) );
scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
} else {
scale0 = ( 1.0f - t );
scale1 = t;
}
return ( ( start * scale0 ) + ( unitQuat1 * scale1 ) );
}
inline const Quat_ref squad( float t, const Quat_ref & unitQuat0, const Quat_ref & unitQuat1, const Quat_ref & unitQuat2, const Quat_ref & unitQuat3 )
{
Quat_ref tmp0, tmp1;
tmp0 = slerp( t, unitQuat0, unitQuat3 );
tmp1 = slerp( t, unitQuat1, unitQuat2 );
return slerp( ( ( 2.0f * t ) * ( 1.0f - t ) ), tmp0, tmp1 );
}
inline float norm( const Quat_ref & quat )
{
float result;
result = ( quat.getX() * quat.getX() );
result = ( result + ( quat.getY() * quat.getY() ) );
result = ( result + ( quat.getZ() * quat.getZ() ) );
result = ( result + ( quat.getW() * quat.getW() ) );
return result;
}
inline float length( const Quat_ref & quat )
{
return ::sqrtf( norm( quat ) );
}
inline const Quat_ref normalize( const Quat_ref & quat )
{
float lenSqr, lenInv;
lenSqr = norm( quat );
lenInv = ( 1.0f / sqrtf( lenSqr ) );
return Quat_ref(
( quat.getX() * lenInv ),
( quat.getY() * lenInv ),
( quat.getZ() * lenInv ),
( quat.getW() * lenInv )
);
}
inline const Quat_ref Quat_ref::rotation( const vmVector3 & unitVec0, const vmVector3 & unitVec1 )
{
float cosHalfAngleX2, recipCosHalfAngleX2;
cosHalfAngleX2 = sqrtf( ( 2.0f * ( 1.0f + dot( unitVec0, unitVec1 ) ) ) );
recipCosHalfAngleX2 = ( 1.0f / cosHalfAngleX2 );
return Quat_ref( ( cross( unitVec0, unitVec1 ) * recipCosHalfAngleX2 ), ( cosHalfAngleX2 * 0.5f ) );
}
inline const Quat_ref Quat_ref::rotation( float radians, const vmVector3 & unitVec )
{
float s, c, angle;
angle = ( radians * 0.5f );
s = sinf( angle );
c = cosf( angle );
return Quat_ref( ( unitVec * s ), c );
}
inline const Quat_ref Quat_ref::rotationX( float radians )
{
float s, c, angle;
angle = ( radians * 0.5f );
s = sinf( angle );
c = cosf( angle );
return Quat_ref( s, 0.0f, 0.0f, c );
}
inline const Quat_ref Quat_ref::rotationY( float radians )
{
float s, c, angle;
angle = ( radians * 0.5f );
s = sinf( angle );
c = cosf( angle );
return Quat_ref( 0.0f, s, 0.0f, c );
}
inline const Quat_ref Quat_ref::rotationZ( float radians )
{
float s, c, angle;
angle = ( radians * 0.5f );
s = sinf( angle );
c = cosf( angle );
return Quat_ref( 0.0f, 0.0f, s, c );
}
inline const Quat_ref Quat_ref::operator *( const Quat_ref & quat ) const
{
return Quat_ref(
( ( ( ( mW * quat.mX ) + ( mX * quat.mW ) ) + ( mY * quat.mZ ) ) - ( mZ * quat.mY ) ),
( ( ( ( mW * quat.mY ) + ( mY * quat.mW ) ) + ( mZ * quat.mX ) ) - ( mX * quat.mZ ) ),
( ( ( ( mW * quat.mZ ) + ( mZ * quat.mW ) ) + ( mX * quat.mY ) ) - ( mY * quat.mX ) ),
( ( ( ( mW * quat.mW ) - ( mX * quat.mX ) ) - ( mY * quat.mY ) ) - ( mZ * quat.mZ ) )
);
}
inline Quat_ref & Quat_ref::operator *=( const Quat_ref & quat )
{
*this = *this * quat;
return *this;
}
inline const vmVector3 rotate( const Quat_ref & quat, const vmVector3 & vec )
{
float tmpX, tmpY, tmpZ, tmpW;
tmpX = ( ( ( quat.getW() * vec.getX() ) + ( quat.getY() * vec.getZ() ) ) - ( quat.getZ() * vec.getY() ) );
tmpY = ( ( ( quat.getW() * vec.getY() ) + ( quat.getZ() * vec.getX() ) ) - ( quat.getX() * vec.getZ() ) );
tmpZ = ( ( ( quat.getW() * vec.getZ() ) + ( quat.getX() * vec.getY() ) ) - ( quat.getY() * vec.getX() ) );
tmpW = ( ( ( quat.getX() * vec.getX() ) + ( quat.getY() * vec.getY() ) ) + ( quat.getZ() * vec.getZ() ) );
return vmVector3(
( ( ( ( tmpW * quat.getX() ) + ( tmpX * quat.getW() ) ) - ( tmpY * quat.getZ() ) ) + ( tmpZ * quat.getY() ) ),
( ( ( ( tmpW * quat.getY() ) + ( tmpY * quat.getW() ) ) - ( tmpZ * quat.getX() ) ) + ( tmpX * quat.getZ() ) ),
( ( ( ( tmpW * quat.getZ() ) + ( tmpZ * quat.getW() ) ) - ( tmpX * quat.getY() ) ) + ( tmpY * quat.getX() ) )
);
}
inline const Quat_ref conj( const Quat_ref & quat )
{
return Quat_ref( -quat.getX(), -quat.getY(), -quat.getZ(), quat.getW() );
}
inline const Quat_ref select( const Quat_ref & quat0, const Quat_ref & quat1, bool select1 )
{
return Quat_ref(
( select1 )? quat1.getX() : quat0.getX(),
( select1 )? quat1.getY() : quat0.getY(),
( select1 )? quat1.getZ() : quat0.getZ(),
( select1 )? quat1.getW() : quat0.getW()
);
}
#define LOOPCOUNT 1000
#define NUM_CYCLES 10000
#define DATA_SIZE 1024
int Test_quat_aos_neon(void)
{
return 0;
}
#endif

View File

@@ -0,0 +1,21 @@
//
// Test_quat_aos_neon.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc., Inc.
//
#ifndef BulletTest_Test_quat_aos_neon_h
#define BulletTest_Test_quat_aos_neon_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_quat_aos_neon(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,181 @@
//
// Test_v3cross.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_v3cross.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static btVector3& v3cross_ref(btVector3& v1, btVector3& v2);
#define LOOPCOUNT 1024
#define NUM_CYCLES 1000
int Test_v3cross(void)
{
btVector3 v1, v2, v3;
float x,y,z,w;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = BT_NAN; // w channel NaN
v1.setValue(x,y,z);
v1.setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v2.setValue(x,y,z);
v2.setW(w);
v3 = v1;
btVector3 correct_res, test_res;
{
float vNaN = BT_NAN;
correct_res.setValue(vNaN, vNaN, vNaN);
test_res.setValue(vNaN, vNaN, vNaN);
correct_res = v3cross_ref(v1, v2);
test_res = v3.cross(v2);
if( fabs(correct_res.m_floats[0] - test_res.m_floats[0]) +
fabs(correct_res.m_floats[1] - test_res.m_floats[1]) +
fabs(correct_res.m_floats[2] - test_res.m_floats[2]) > FLT_EPSILON * 4)
{
vlog( "Error - v3cross result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f) \n",
correct_res.m_floats[0], correct_res.m_floats[1], correct_res.m_floats[2],
test_res.m_floats[0], test_res.m_floats[1], test_res.m_floats[2]);
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btVector3 vec3_arr1[DATA_SIZE];
btVector3 vec3_arr2[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
vec3_arr1[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x,y,z);
vec3_arr2[k].setW(w);
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
vec3_arr1[k] = v3cross_ref(vec3_arr1[k], vec3_arr2[k]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
vec3_arr1[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x,y,z);
vec3_arr2[k].setW(w);
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
vec3_arr1[k] = vec3_arr1[k].cross(vec3_arr2[k]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
return 0;
}
static btVector3& v3cross_ref(btVector3& v1, btVector3& v2)
{
btScalar x,y,z;
x = v1.m_floats[1] * v2.m_floats[2] - v1.m_floats[2] * v2.m_floats[1];
y = v1.m_floats[2] * v2.m_floats[0] - v1.m_floats[0] * v2.m_floats[2];
z = v1.m_floats[0] * v2.m_floats[1] - v1.m_floats[1] * v2.m_floats[0];
v1.m_floats[0] = x;
v1.m_floats[1] = y;
v1.m_floats[2] = z;
return v1;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_v3cross.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_v3cross_h
#define BulletTest_Test_v3cross_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_v3cross(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,178 @@
//
// Test_v3div.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_v3div.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btVector3.h>
#define BT_OP(a, b) ((a) / (b))
// reference code for testing purposes
static inline btVector3& v3div_ref(btVector3& v1, btVector3& v2);
static btVector3& v3div_ref(btVector3& v0, btVector3& v1, btVector3& v2)
{
v0.m_floats[0] = BT_OP(v1.m_floats[0] , v2.m_floats[0]),
v0.m_floats[1] = BT_OP(v1.m_floats[1] , v2.m_floats[1]),
v0.m_floats[2] = BT_OP(v1.m_floats[2] , v2.m_floats[2]);
return v0;
}
#define LOOPCOUNT 1024
#define NUM_CYCLES 1000
int Test_v3div(void)
{
btVector3 v1, v2, v3;
float x,y,z,w;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = BT_NAN; // w channel NaN
v1.setValue(x,y,z);
v1.setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v2.setValue(x,y,z);
v2.setW(w);
v3 = v1;
btVector3 correct_res, test_res;
{
float vNaN = BT_NAN;
correct_res.setValue(vNaN, vNaN, vNaN);
test_res.setValue(vNaN, vNaN, vNaN);
correct_res = v3div_ref(correct_res, v1, v2);
test_res = BT_OP(v3,v2);
if( fabsf(correct_res.m_floats[0] - test_res.m_floats[0]) +
fabsf(correct_res.m_floats[1] - test_res.m_floats[1]) +
fabsf(correct_res.m_floats[2] - test_res.m_floats[2]) > FLT_EPSILON*10 )
{
vlog( "Error - v3div result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f) \n",
correct_res.m_floats[0], correct_res.m_floats[1], correct_res.m_floats[2],
test_res.m_floats[0], test_res.m_floats[1], test_res.m_floats[2]);
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btVector3 vec3_arr0[DATA_SIZE];
btVector3 vec3_arr1[DATA_SIZE];
btVector3 vec3_arr2[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
{
uint64_t startTime, bestTime, currentTime;
w = BT_NAN; // w channel NaN
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
vec3_arr1[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x,y,z);
vec3_arr2[k].setW(w);
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
vec3_arr0[k] = v3div_ref(vec3_arr0[k], vec3_arr1[k], vec3_arr2[k]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
vec3_arr1[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x,y,z);
vec3_arr2[k].setW(w);
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
vec3_arr0[k] = BT_OP(vec3_arr1[k] , vec3_arr2[k]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
return 0;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_v3div.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_v3div_h
#define BulletTest_Test_v3div_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_v3div(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,164 @@
//
// Test_v3dot.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_v3dot.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static inline
btScalar v3dot_ref(
const btVector3& v1,
const btVector3& v2);
#define LOOPCOUNT 1000
#define NUM_CYCLES 10000
int Test_v3dot(void)
{
btVector3 v1, v2;
float x,y,z,w;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = BT_NAN; // w channel NaN
v1.setValue(x,y,z);
v1.setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v2.setValue(x,y,z);
v2.setW(w);
float correctDot0, testDot0;
{
correctDot0 = w;
testDot0 = w; ;
correctDot0 = v3dot_ref(v1, v2);
testDot0 = v1.dot(v2);
if( fabsf(correctDot0 - testDot0) > FLT_EPSILON * 4 )
{
vlog( "Error - v3dot result error! %f != %f \n", correctDot0, testDot0);
return 1;
}
}
#define DATA_SIZE 1024
btVector3 vec3_arr1[DATA_SIZE];
btVector3 vec3_arr2[DATA_SIZE];
btScalar res_arr[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
vec3_arr1[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x,y,z);
vec3_arr2[k].setW(w);
res_arr[k] = w;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
{
size_t k32 = (k & (DATA_SIZE-1));
res_arr[k32] = v3dot_ref( vec3_arr1[k32], vec3_arr2[k32]); k32++;
res_arr[k32] = v3dot_ref( vec3_arr1[k32], vec3_arr2[k32]); k32++;
res_arr[k32] = v3dot_ref( vec3_arr1[k32], vec3_arr2[k32]); k32++;
res_arr[k32] = v3dot_ref( vec3_arr1[k32], vec3_arr2[k32]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
{
size_t k32 = k & (DATA_SIZE -1);
res_arr[k32] = vec3_arr1[k32].dot(vec3_arr2[k32]); k32++;
res_arr[k32] = vec3_arr1[k32].dot(vec3_arr2[k32]); k32++;
res_arr[k32] = vec3_arr1[k32].dot(vec3_arr2[k32]); k32++;
res_arr[k32] = vec3_arr1[k32].dot(vec3_arr2[k32]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT );
return 0;
}
static btScalar v3dot_ref(const btVector3& v1,
const btVector3& v2)
{
return (v1.m_floats[0] * v2.m_floats[0] +
v1.m_floats[1] * v2.m_floats[1] +
v1.m_floats[2] * v2.m_floats[2]);
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_v3dot.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_v3dot_h
#define BulletTest_Test_v3dot_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_v3dot(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,195 @@
//
// Test_v3interp.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_v3interp.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static inline
btVector3& v3interp_ref(
btVector3& vr,
btVector3& v0,
btVector3& v1,
btScalar& rt);
#define LOOPCOUNT 1024
#define NUM_CYCLES 1000
int Test_v3interp(void)
{
btVector3 v1, v2;
btScalar rt;
float x,y,z,w;
float vNaN = BT_NAN;
w = BT_NAN; // w channel NaN
btVector3 correct_res, test_res;
for (rt = 0.0f; rt <= 1.0f; rt += 0.1f)
{
correct_res.setValue(vNaN, vNaN, vNaN);
test_res.setValue(vNaN, vNaN, vNaN);
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v1.setValue(x,y,z);
v1.setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v2.setValue(x,y,z);
v2.setW(w);
correct_res = v3interp_ref(correct_res, v1, v2, rt);
test_res.setInterpolate3(v1, v2, rt);
if( fabs(correct_res.m_floats[0] - test_res.m_floats[0]) +
fabs(correct_res.m_floats[1] - test_res.m_floats[1]) +
fabs(correct_res.m_floats[2] - test_res.m_floats[2]) > FLT_EPSILON * 4)
{
vlog( "Error - v3interp result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f) \n"
"\n rt=%10.4f",
correct_res.m_floats[0], correct_res.m_floats[1], correct_res.m_floats[2],
test_res.m_floats[0], test_res.m_floats[1], test_res.m_floats[2], rt);
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btVector3 vec3_arr1[DATA_SIZE];
btVector3 vec3_arr2[DATA_SIZE];
btScalar rt_arr[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
vec3_arr1[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x,y,z);
vec3_arr2[k].setW(w);
rt_arr[k] = RANDF_01;
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
v3interp_ref(vec3_arr1[k], vec3_arr1[k], vec3_arr2[k], rt_arr[k]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
vec3_arr1[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x,y,z);
vec3_arr2[k].setW(w);
rt_arr[k] = RANDF_01;
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
vec3_arr1[k].setInterpolate3(vec3_arr1[k], vec3_arr2[k], rt_arr[k]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
return 0;
}
static btVector3&
v3interp_ref(
btVector3& vr,
btVector3& v0,
btVector3& v1,
btScalar& rt)
{
btScalar s = btScalar(1.0) - rt;
vr.m_floats[0] = s * v0.m_floats[0] + rt * v1.m_floats[0];
vr.m_floats[1] = s * v0.m_floats[1] + rt * v1.m_floats[1];
vr.m_floats[2] = s * v0.m_floats[2] + rt * v1.m_floats[2];
return vr;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_v3interp.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_v3interp_h
#define BulletTest_Test_v3interp_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_v3interp(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,198 @@
//
// Test_v3lerp.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_v3lerp.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static inline
btVector3&
v3lerp_ref(
btVector3& vr,
btVector3& v0,
btVector3& v1,
btScalar& rt);
#define LOOPCOUNT 1024
#define NUM_CYCLES 1000
int Test_v3lerp(void)
{
btVector3 v1, v2;
btScalar rt;
float x,y,z,w;
float vNaN =BT_NAN;
w =BT_NAN; // w channel NaN
btVector3 correct_res, test_res;
for (rt = 0.0f; rt <= 1.0f; rt += 0.1f)
{
correct_res.setValue(vNaN, vNaN, vNaN);
test_res.setValue(vNaN, vNaN, vNaN);
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v1.setValue(x,y,z);
v1.setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v2.setValue(x,y,z);
v2.setW(w);
correct_res = v3lerp_ref(correct_res, v1, v2, rt);
test_res = v1.lerp(v2, rt);
if( fabs(correct_res.m_floats[0] - test_res.m_floats[0]) +
fabs(correct_res.m_floats[1] - test_res.m_floats[1]) +
fabs(correct_res.m_floats[2] - test_res.m_floats[2]) > FLT_EPSILON * 4)
{
vlog( "Error - v3lerp result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f) \n"
"\n rt=%10.4f",
correct_res.m_floats[0], correct_res.m_floats[1], correct_res.m_floats[2],
test_res.m_floats[0], test_res.m_floats[1], test_res.m_floats[2], rt);
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btVector3 vec3_arr1[DATA_SIZE];
btVector3 vec3_arr2[DATA_SIZE];
btScalar rt_arr[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
{
uint64_t startTime, bestTime, currentTime;
w =BT_NAN; // w channel NaN
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
vec3_arr1[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x,y,z);
vec3_arr2[k].setW(w);
rt_arr[k] = RANDF_01;
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
v3lerp_ref(vec3_arr1[k], vec3_arr1[k], vec3_arr2[k], rt_arr[k]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
vec3_arr1[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x,y,z);
vec3_arr2[k].setW(w);
rt_arr[k] = RANDF_01;
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
vec3_arr1[k] = vec3_arr1[k].lerp(vec3_arr2[k], rt_arr[k]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
return 0;
}
static
btVector3&
v3lerp_ref(
btVector3& vr,
btVector3& v0,
btVector3& v1,
btScalar& rt)
{
vr.m_floats[0] = v0.m_floats[0] + rt * (v1.m_floats[0] - v0.m_floats[0]);
vr.m_floats[1] = v0.m_floats[1] + rt * (v1.m_floats[1] - v0.m_floats[1]);
vr.m_floats[2] = v0.m_floats[2] + rt * (v1.m_floats[2] - v0.m_floats[2]);
return vr;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_v3lerp.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_v3lerp_h
#define BulletTest_Test_v3lerp_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_v3lerp(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,170 @@
//
// Test_v3norm.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_v3norm.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static inline btVector3& v3norm_ref(btVector3& v);
#define LOOPCOUNT 1024
#define NUM_CYCLES 1000
int Test_v3norm(void)
{
btVector3 v1, v2;
float x,y,z,w;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = BT_NAN; // w channel NaN
v1.setValue(x,y,z);
v1.setW(w);
v2 = v1;
btVector3 correct_res, test_res;
{
float vNaN = BT_NAN;
correct_res.setValue(vNaN, vNaN, vNaN);
test_res.setValue(vNaN, vNaN, vNaN);
correct_res = v3norm_ref(v1);
test_res = v2.normalize();
if( fabs(correct_res.m_floats[0] - test_res.m_floats[0]) +
fabs(correct_res.m_floats[1] - test_res.m_floats[1]) +
fabs(correct_res.m_floats[2] - test_res.m_floats[2]) > FLT_EPSILON * 4)
{
vlog( "Error - v3norm result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f) \n",
correct_res.m_floats[0], correct_res.m_floats[1], correct_res.m_floats[2],
test_res.m_floats[0], test_res.m_floats[1], test_res.m_floats[2]);
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btVector3 vec3_arr0[DATA_SIZE];
btVector3 vec3_arr1[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
vec3_arr1[k].setW(w);
}
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
{
vec3_arr0[k] = v3norm_ref(vec3_arr1[k]);
vec3_arr0[k+1] = v3norm_ref(vec3_arr1[k+1]);
vec3_arr0[k+2] = v3norm_ref(vec3_arr1[k+2]);
vec3_arr0[k+3] = v3norm_ref(vec3_arr1[k+3]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
vec3_arr1[k].setW(w);
}
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
{
vec3_arr0[k] = vec3_arr1[k].normalize();
vec3_arr0[k+1] = vec3_arr1[k+1].normalize();
vec3_arr0[k+2] = vec3_arr1[k+2].normalize();
vec3_arr0[k+3] = vec3_arr1[k+3].normalize();
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
return 0;
}
static btVector3& v3norm_ref(btVector3& v)
{
float dot = v.m_floats[0] * v.m_floats[0] +
v.m_floats[1] * v.m_floats[1] +
v.m_floats[2] * v.m_floats[2];
dot = 1.0f / sqrtf(dot);
v.m_floats[0] *= dot;
v.m_floats[1] *= dot;
v.m_floats[2] *= dot;
return v;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_v3norm.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_v3norm_h
#define BulletTest_Test_v3norm_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_v3norm(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,194 @@
//
// Test_v3rotate.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_v3rotate.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static inline
btVector3& v3rotate_ref(
btVector3& v0,
btVector3& v1,
const btScalar& s);
#define LOOPCOUNT 2048
#define NUM_CYCLES 1000
int Test_v3rotate(void)
{
btVector3 v1, v2;
float s;
float x,y,z,w;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = BT_NAN; // w channel NaN
v1.setValue(x,y,z);
v1.setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v2.setValue(x,y,z);
v2.setW(w);
s = RANDF_01 * (float) SIMD_PI;
btVector3 correct_res, test_res;
{
float vNaN = BT_NAN;
correct_res.setValue(vNaN, vNaN, vNaN);
test_res.setValue(vNaN, vNaN, vNaN);
test_res = v1.rotate(v2, s);
correct_res = v3rotate_ref(v1, v2, s);
if( fabs(correct_res.m_floats[0] - test_res.m_floats[0]) +
fabs(correct_res.m_floats[1] - test_res.m_floats[1]) +
fabs(correct_res.m_floats[2] - test_res.m_floats[2]) > FLT_EPSILON * 4)
{
vlog( "Error - v3rotate result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f) \n",
correct_res.m_floats[0], correct_res.m_floats[1], correct_res.m_floats[2],
test_res.m_floats[0], test_res.m_floats[1], test_res.m_floats[2]);
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btVector3 vec3_arr0[DATA_SIZE];
btVector3 vec3_arr1[DATA_SIZE];
btScalar s_arr[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr0[k].setValue(x,y,z);
vec3_arr0[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
vec3_arr1[k].setW(w);
s_arr[k] = RANDF_01 * (float)SIMD_PI;
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
vec3_arr0[k] = v3rotate_ref(vec3_arr0[k], vec3_arr1[k], s_arr[k]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr0[k].setValue(x,y,z);
vec3_arr0[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
vec3_arr1[k].setW(w);
s_arr[k] = RANDF_01 * (float)SIMD_PI;
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
vec3_arr0[k ] = vec3_arr0[k ].rotate(vec3_arr1[k ], s_arr[k]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
return 0;
}
static inline
btVector3&
v3rotate_ref(
btVector3& v0,
btVector3& wAxis,
const btScalar& _angle)
{
btVector3 o = wAxis * wAxis.dot( v0 );
btVector3 _x = v0 - o;
btVector3 _y;
_y = wAxis.cross( v0 );
v0 = o + _x * cosf( _angle ) + _y * sinf( _angle );
return v0;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_v3rotate.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_v3rotate_h
#define BulletTest_Test_v3rotate_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_v3rotate(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,181 @@
//
// Test_v3sdiv.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_v3sdiv.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static inline
btVector3& v3sdiv_ref(
btVector3& v,
const btScalar& s);
#define LOOPCOUNT 2048
#define NUM_CYCLES 1000
int Test_v3sdiv(void)
{
btVector3 v1, v2;
btScalar s;
float x,y,z,w;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = BT_NAN; // w channel NaN
v1.setValue(x,y,z);
v1.setW(w);
v2.setValue(x,y,z);
v2.setW(w);
s = (float) RANDF_16;
btVector3 correct_res, test_res;
{
float vNaN = BT_NAN;
correct_res.setValue(vNaN, vNaN, vNaN);
test_res.setValue(vNaN, vNaN, vNaN);
correct_res = v3sdiv_ref(v1, s);
test_res = (v2 /= s);
if( fabs(correct_res.m_floats[0] - test_res.m_floats[0]) +
fabs(correct_res.m_floats[1] - test_res.m_floats[1]) +
fabs(correct_res.m_floats[2] - test_res.m_floats[2]) > FLT_EPSILON * 4)
{
vlog( "Error - v3sdiv result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f) \n",
correct_res.m_floats[0], correct_res.m_floats[1], correct_res.m_floats[2],
test_res.m_floats[0], test_res.m_floats[1], test_res.m_floats[2]);
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btVector3 vec3_arr[DATA_SIZE];
btScalar s_arr[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
{
uint64_t startTime, bestTime, currentTime;
bestTime = uint64_t(-1LL);
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr[k].setValue(x,y,z);
vec3_arr[k].setW(w);
s_arr[k] = RANDF_01;
}
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
{
v3sdiv_ref( vec3_arr[k], s_arr[k]);
v3sdiv_ref( vec3_arr[k+1], s_arr[k+1]);
v3sdiv_ref( vec3_arr[k+2], s_arr[k+2]);
v3sdiv_ref( vec3_arr[k+3], s_arr[k+3]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr[k].setValue(x,y,z);
vec3_arr[k].setW(w);
s_arr[k] = RANDF_01;
}
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
{
vec3_arr[k] /= s_arr[k];
vec3_arr[k+1] /= s_arr[k+1];
vec3_arr[k+2] /= s_arr[k+2];
vec3_arr[k+3] /= s_arr[k+3];
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
return 0;
}
static inline
btVector3&
v3sdiv_ref(
btVector3& v,
const btScalar& s)
{
btScalar recip = btScalar(1.0) / s;
v.m_floats[0] *= recip;
v.m_floats[1] *= recip;
v.m_floats[2] *= recip;
return v;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_v3sdiv.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_v3sdiv_h
#define BulletTest_Test_v3sdiv_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_v3sdiv(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,197 @@
//
// Test_v3skew.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_v3skew.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static void
v3skew_ref(
const btVector3* v,
btVector3* v1,
btVector3* v2,
btVector3* v3);
#define LOOPCOUNT 2048
#define NUM_CYCLES 10000
int Test_v3skew(void)
{
btVector3 v, v1, v2, v3, vt1, vt2, vt3;
float x,y,z,w;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = BT_NAN; // w channel NaN
v.setValue(x,y,z);
v.setW(w);
v1.setValue(w,w,w);
v1.setW(w);
vt3 = vt2 = vt1 = v3 = v2 = v1;
{
v3skew_ref(&v, &v1, &v2, &v3);
v.getSkewSymmetricMatrix(&vt1, &vt2, &vt3);
/*
if( v1.m_floats[0] != vt1.m_floats[0] ||
v1.m_floats[1] != vt1.m_floats[1] ||
v1.m_floats[2] != vt1.m_floats[2] )
*/
if(!(v1 == vt1))
{
vlog( "Error - v3skew result error! "
"\ncorrect v1 = (%10.4f, %10.4f, %10.4f) "
"\ntested v1 = (%10.4f, %10.4f, %10.4f) \n",
v1.m_floats[0], v1.m_floats[1], v1.m_floats[2],
vt1.m_floats[0], vt1.m_floats[1], vt1.m_floats[2]);
return 1;
}
/*
if( v2.m_floats[0] != vt2.m_floats[0] ||
v2.m_floats[1] != vt2.m_floats[1] ||
v2.m_floats[2] != vt2.m_floats[2] )
*/
if(!(v2 == vt2))
{
vlog( "Error - v3skew result error! "
"\ncorrect v2 = (%10.4f, %10.4f, %10.4f) "
"\ntested v2 = (%10.4f, %10.4f, %10.4f) \n",
v2.m_floats[0], v2.m_floats[1], v2.m_floats[2],
vt2.m_floats[0], vt2.m_floats[1], vt2.m_floats[2]);
return 1;
}
/*
if( v3.m_floats[0] != vt3.m_floats[0] ||
v3.m_floats[1] != vt3.m_floats[1] ||
v3.m_floats[2] != vt3.m_floats[2] )
*/
if(!(v3 == vt3))
{
vlog( "Error - v3skew result error! "
"\ncorrect v3 = (%10.4f, %10.4f, %10.4f) "
"\ntested v3 = (%10.4f, %10.4f, %10.4f) \n",
v3.m_floats[0], v3.m_floats[1], v3.m_floats[2],
vt3.m_floats[0], vt3.m_floats[1], vt3.m_floats[2]);
return 1;
}
}
#define DATA_SIZE 256
btVector3 v3_arr0[DATA_SIZE];
btVector3 v3_arr1[DATA_SIZE];
btVector3 v3_arr2[DATA_SIZE];
btVector3 v3_arr3[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v3_arr0[k].setValue(x,y,z);
v3_arr0[k].setW(w);
v3_arr1[k].setValue(w,w,w);
v3_arr1[k].setW(w);
v3_arr3[k] = v3_arr2[k] = v3_arr1[k];
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
size_t k32 = (k & (DATA_SIZE-1));
v3skew_ref( &v3_arr0[k32], &v3_arr1[k32], &v3_arr2[k32], &v3_arr3[k32]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
size_t k32 = (k & (DATA_SIZE -1));
v3_arr0[k32].getSkewSymmetricMatrix(&v3_arr1[k32], &v3_arr2[k32], &v3_arr3[k32]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT );
return 0;
}
static void
v3skew_ref(
const btVector3* v,
btVector3* v1,
btVector3* v2,
btVector3* v3)
{
v1->setValue(0. ,-v->z(),v->y());
v2->setValue(v->z() ,0. ,-v->x());
v3->setValue(-v->y(),v->x() ,0.);
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_v3skew.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_v3skew_h
#define BulletTest_Test_v3skew_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_v3skew(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,180 @@
//
// Test_v3triple.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_v3triple.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static btScalar
v3triple_ref(
const btVector3& v,
const btVector3& v1,
const btVector3& v2);
#define LOOPCOUNT 1024
#define NUM_CYCLES 10000
int Test_v3triple(void)
{
btVector3 v1, v2, v3;
float x,y,z,w;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = BT_NAN; // w channel NaN
v1.setValue(x,y,z);
v1.setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v2.setValue(x,y,z);
v2.setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v3.setValue(x,y,z);
v3.setW(w);
float correctTriple0, testTriple0;
{
correctTriple0 = w;
testTriple0 = w;
testTriple0 = v3triple_ref(v1,v2,v3);
correctTriple0 = v1.triple(v2, v3);
if( fabsf(correctTriple0 - testTriple0) > FLT_EPSILON * 4 )
{
vlog( "Error - v3triple result error! %f != %f \n", correctTriple0, testTriple0);
return 1;
}
}
#define DATA_SIZE 1024
btVector3 v3_arr1[DATA_SIZE];
btVector3 v3_arr2[DATA_SIZE];
btVector3 v3_arr3[DATA_SIZE];
btScalar res_arr[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v3_arr1[k].setValue(x,y,z);
v3_arr1[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v3_arr2[k].setValue(x,y,z);
v3_arr2[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v3_arr3[k].setValue(x,y,z);
v3_arr3[k].setW(w);
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = uint64_t(-1LL);
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
{
size_t k32 = (k & (DATA_SIZE-1));
res_arr[k32] = v3triple_ref( v3_arr1[k32], v3_arr2[k32], v3_arr3[k32]); k32++;
res_arr[k32] = v3triple_ref( v3_arr1[k32], v3_arr2[k32], v3_arr3[k32]); k32++;
res_arr[k32] = v3triple_ref( v3_arr1[k32], v3_arr2[k32], v3_arr3[k32]); k32++;
res_arr[k32] = v3triple_ref( v3_arr1[k32], v3_arr2[k32], v3_arr3[k32]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = uint64_t(-1LL);
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
{
size_t k32 = k & (DATA_SIZE -1);
res_arr[k32] = v3_arr1[k32].triple(v3_arr2[k32], v3_arr3[k32]); k32++;
res_arr[k32] = v3_arr1[k32].triple(v3_arr2[k32], v3_arr3[k32]); k32++;
res_arr[k32] = v3_arr1[k32].triple(v3_arr2[k32], v3_arr3[k32]); k32++;
res_arr[k32] = v3_arr1[k32].triple(v3_arr2[k32], v3_arr3[k32]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT );
return 0;
}
static btScalar
v3triple_ref(
const btVector3& v,
const btVector3& v1,
const btVector3& v2)
{
return
v.m_floats[0] * (v1.m_floats[1] * v2.m_floats[2] - v1.m_floats[2] * v2.m_floats[1]) +
v.m_floats[1] * (v1.m_floats[2] * v2.m_floats[0] - v1.m_floats[0] * v2.m_floats[2]) +
v.m_floats[2] * (v1.m_floats[0] * v2.m_floats[1] - v1.m_floats[1] * v2.m_floats[0]);
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_v3triple.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_v3triple_h
#define BulletTest_Test_v3triple_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_v3triple(void);
#ifdef __cplusplus
}
#endif
#endif

272
Test/Source/Utils.cpp Normal file
View File

@@ -0,0 +1,272 @@
//
// File.c
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include <stdio.h>
#ifdef __APPLE__
#include <mach/mach_time.h>
#include <sys/sysctl.h>
#include <sys/mman.h>
#include <errno.h>
#else
#include "LinearMath/btAlignedAllocator.h"
#endif //__APPLE__
#include <stdlib.h>
#include "Utils.h"
#pragma mark Timing
int gReportNanoseconds = 0;
#ifdef _WIN32
#include <intrin.h>
uint64_t ReadTicks( void )
{
return __rdtsc();
}
double TicksToCycles( uint64_t delta )
{
return double(delta);
}
double TicksToSeconds( uint64_t delta )
{
return double(delta);
}
void *GuardCalloc( size_t count, size_t size, size_t *objectStride )
{
if (objectStride)
*objectStride = size;
return (void*) btAlignedAlloc(count * size,16);
}
void GuardFree( void *buf )
{
btAlignedFree(buf);
}
#endif
#ifdef __APPLE__
uint64_t ReadTicks( void )
{
return mach_absolute_time();
}
double TicksToCycles( uint64_t delta )
{
static long double conversion = 0.0L;
if( 0.0L == conversion )
{
// attempt to get conversion to nanoseconds
mach_timebase_info_data_t info;
int err = mach_timebase_info( &info );
if( err )
return __builtin_nanf("");
conversion = (long double) info.numer / info.denom;
// attempt to get conversion to cycles
if( 0 == gReportNanoseconds )
{
uint64_t frequency = 0;
size_t freq_size = sizeof( frequency );
err = sysctlbyname( "hw.cpufrequency_max", &frequency, &freq_size, NULL, 0 );
if( err || 0 == frequency )
vlog( "Failed to get max cpu frequency. Reporting times as nanoseconds.\n" );
else
{
conversion *= 1e-9L /* sec / ns */ * frequency /* cycles / sec */;
vlog( "Reporting times as cycles. (%2.2f MHz)\n", 1e-6 * frequency );
}
}
else
vlog( "Reporting times as nanoseconds.\n" );
}
return (double) (delta * conversion);
}
double TicksToSeconds( uint64_t delta )
{
static long double conversion = 0.0L;
if( 0.0L == conversion )
{
// attempt to get conversion to nanoseconds
mach_timebase_info_data_t info;
int err = mach_timebase_info( &info );
if( err )
return __builtin_nanf("");
conversion = info.numer / (1e9L * info.denom);
}
return (double) (delta * conversion);
}
#pragma mark -
#pragma mark GuardCalloc
#define kPageSize 4096
typedef struct BufInfo
{
void *head;
size_t count;
size_t stride;
size_t totalSize;
}BufInfo;
static int GuardMarkBuffer( void *buffer, int flag );
void *GuardCalloc( size_t count, size_t size, size_t *objectStride )
{
if( objectStride )
*objectStride = 0;
// Round size up to a multiple of a page size
size_t stride = (size + kPageSize - 1) & -kPageSize;
//Calculate total size of the allocation
size_t totalSize = count * (stride + kPageSize) + kPageSize;
// Allocate
char *buf = (char*)mmap( NULL,
totalSize,
PROT_READ | PROT_WRITE,
MAP_ANON | MAP_SHARED,
0, 0 );
if( MAP_FAILED == buf )
{
vlog( "mmap failed: %d\n", errno );
return NULL;
}
// Find the first byte of user data
char *result = buf + kPageSize;
// Record what we did for posterity
BufInfo *bptr = (BufInfo*) result - 1;
bptr->head = buf;
bptr->count = count;
bptr->stride = stride;
bptr->totalSize = totalSize;
// Place the first guard page. Masks our record above.
if( mprotect(buf, kPageSize, PROT_NONE) )
{
munmap( buf, totalSize);
vlog( "mprotect -1 failed: %d\n", errno );
return NULL;
}
// Place the rest of the guard pages
size_t i;
char *p = result;
for( i = 0; i < count; i++ )
{
p += stride;
if( mprotect(p, kPageSize, PROT_NONE) )
{
munmap( buf, totalSize);
vlog( "mprotect %lu failed: %d\n", i, errno );
return NULL;
}
p += kPageSize;
}
// record the stride from object to object
if( objectStride )
*objectStride = stride + kPageSize;
// return pointer to first object
return result;
}
void GuardFree( void *buf )
{
if( mprotect((char*)buf - kPageSize, kPageSize, PROT_READ) )
{
vlog( "Unable to read buf info. GuardFree failed! %p (%d)\n", buf, errno );
return;
}
BufInfo *bptr = (BufInfo*) buf - 1;
if( munmap( bptr->head, bptr->totalSize ) )
vlog( "Unable to unmap data. GuardFree failed! %p (%d)\n", buf, errno );
}
int GuardMarkReadOnly( void *buf )
{
return GuardMarkBuffer(buf, PROT_READ);
}
int GuardMarkReadWrite( void *buf)
{
return GuardMarkBuffer(buf, PROT_READ | PROT_WRITE);
}
int GuardMarkWriteOnly( void *buf)
{
return GuardMarkBuffer(buf, PROT_WRITE);
}
static int GuardMarkBuffer( void *buf, int flag )
{
if( mprotect((char*)buf - kPageSize, kPageSize, PROT_READ) )
{
vlog( "Unable to read buf info. GuardMarkBuffer %d failed! %p (%d)\n", flag, buf, errno );
return errno;
}
BufInfo *bptr = (BufInfo*) buf - 1;
size_t count = bptr->count;
size_t stride = bptr->stride;
size_t i;
for( i = 0; i < count; i++ )
{
if( mprotect(buf, stride, flag) )
{
vlog( "Unable to protect segment %ld. GuardMarkBuffer %d failed! %p (%d)\n", i, flag, buf, errno );
return errno;
}
bptr += stride + kPageSize;
}
if( mprotect((char*)buf - kPageSize, kPageSize, PROT_NONE) )
{
vlog( "Unable to protect leading guard page. GuardMarkBuffer %d failed! %p (%d)\n", flag, buf, errno );
return errno;
}
return 0;
}
#endif
uint32_t random_number32(void)
{
return ((uint32_t) rand() << 16) ^ rand();
}
uint64_t random_number64(void)
{
return ((uint64_t) rand() << 48) ^
((uint64_t) rand() << 32) ^
((uint64_t) rand() << 16) ^
rand();
}

72
Test/Source/Utils.h Normal file
View File

@@ -0,0 +1,72 @@
//
// Utils.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Utils_h
#define BulletTest_Utils_h
#include "btIntDefines.h"
#include <stddef.h>
#include <stdio.h>
#ifdef _WIN32
#define LARGE_FLOAT17 (1.f * powf(2,17))
#define RANDF_16 (random_number32() * powf(2,-16))
#define RANDF_01 ( random_number32() * powf(2,-32) )
#define RANDF ( random_number32() * powf(2,-8) )
#define RANDF_m1p1 (2.0f*( random_number32() * powf(2,-32)-1.0f))
#else
#define LARGE_FLOAT17 (0x1.0p17f)
#define RANDF_16 (random_number32() * 0x1.0p-16f)
#define RANDF_01 ( random_number32() * 0x1.0p-32f )
#define RANDF ( random_number32() * 0x1.0p-8f )
#define RANDF_m1p1 (2.0f*( random_number32() * 0x1.0p-32f )-1.0f)
#endif//_WIN32
#ifdef __cplusplus
extern "C" {
#endif
/*********************
* Timing *
*********************/
extern int gReportNanoseconds;
uint64_t ReadTicks( void );
double TicksToCycles( uint64_t delta ); // Performance data should be reported in cycles most of the time.
double TicksToSeconds( uint64_t delta );
/*********************
* Guard Heap *
*********************/
// return buffer containing count objects of size size, with guard pages in betweeen.
// The stride between one object and the next is given by objectStride.
// objectStride may be NULL. Objects so created are freed with GuardFree
void *GuardCalloc( size_t count, size_t size, size_t *objectStride );
void GuardFree( void * );
// mark the contents of a guard buffer read-only or write-only. Return 0 on success.
int GuardMarkReadOnly( void *);
int GuardMarkWriteOnly( void *);
int GuardMarkReadWrite( void *);
/*********************
* Printing *
*********************/
#define vlog( ... ) printf( __VA_ARGS__ )
uint32_t random_number32(void);
uint64_t random_number64(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,19 @@
#ifndef BT_INT_DEFINES_H
#define BT_INT_DEFINES_H
#ifdef __GNUC__
#include <stdint.h>
#elif defined(_MSC_VER)
typedef __int32 int32_t;
typedef __int64 int64_t;
typedef unsigned __int32 uint32_t;
typedef unsigned __int64 uint64_t;
#else
typedef int int32_t;
typedef long long int int64_t;
typedef unsigned int uint32_t;
typedef unsigned long long int uint64_t;
#endif
#endif //BT_INT_DEFINES_H

326
Test/Source/main.cpp Normal file
View File

@@ -0,0 +1,326 @@
//
// main.c
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include <stdio.h>
#ifdef __APPLE__
#include <libgen.h>
#endif //__APPLE__
#include <string.h>
#include <stdlib.h>
#include <limits.h>
#include "main.h"
#include "Utils.h"
#include "TestList.h"
#include "LinearMath/btScalar.h"
#if defined (BT_USE_NEON) || defined (BT_USE_SSE_IN_API)
#ifdef _WIN32
#define strcasecmp _stricmp
#define basename(A) A
#endif
#define EXIT_NO_ERROR INT_MIN
//int gReportNanoseconds = 0; // in Utils.c
int gReportAverageTimes = 0;
int gExitOnError = 0;
char *gFullPath = NULL;
const char *gAppName = NULL;
int gArgc;
const char **gArgv;
typedef struct TestNode
{
struct TestNode *next;
const char *name;
}TestNode;
TestNode *gNodeList = NULL;
static int ParseArgs( int argc, const char *argv[] );
static void PrintUsage( void );
static int Init( void );
static void ListTests(void );
const char *gArch =
#ifdef __i386__
"i386";
#elif defined __x86_64__
"x86_64";
#elif defined __arm__
"arm";
#elif defined _WIN64
"win64";
#elif defined _WIN32
"win32";
#else
#error unknown arch
#endif
#include <stdio.h>
int main (int argc, const char * argv[])
{
// Enable just one test programatically (instead of command-line param)
// TestNode *node = (TestNode*) malloc( sizeof( TestNode ) );
// node->name = "btDbvt";
// node->next = 0;
// gNodeList = node;
srand(0.f);
int numPassedTests=0;
int numFailedTests= 0;
int err;
// Parse arguments. Build gNodeList.
if( (err = ParseArgs( argc, argv ) ) )
{
if( EXIT_NO_ERROR == err )
return 0;
PrintUsage();
return err;
}
printf("Arch: %s\n", gArch );
if( gReportAverageTimes )
printf( "Reporting average times.\n" );
else
printf( "Reporting best times.\n" );
// Set a few things up
if( (err = Init() ))
{
printf( "Init failed.\n" );
return err;
}
if( NULL == gNodeList )
{ // test everything
printf( "No function list found. Testing everything...\n" );
size_t i;
for( i = 0; NULL != gTestList[i].test_func; i++ )
{
printf( "\n----------------------------------------------\n" );
printf( "Testing %s:\n", gTestList[i].name );
printf( "----------------------------------------------\n" );
uint64_t startTime = ReadTicks();
int local_error = gTestList[i].test_func();
uint64_t currentTime = ReadTicks() - startTime;
if( local_error )
{
numFailedTests++;
printf( "*** %s test failed with error: %d\n", gTestList[i].name, local_error );
if( gExitOnError )
return local_error;
if( 0 == err )
err = local_error;
}
else
{
numPassedTests++;
printf("%s Passed.\t\t\t(%2.2gs)\n", gTestList[i].name, TicksToSeconds(currentTime));
}
}
}
else
{ // test just the list
while( NULL != gNodeList )
{
TestNode *currentNode = gNodeList;
gNodeList = gNodeList->next;
// Find the test with that name
size_t i;
for( i = 0; NULL != gTestList[i].test_func; i++ )
if( 0 == strcasecmp( currentNode->name, gTestList[i].name ) )
break;
if( NULL != gTestList[i].test_func )
{
printf( "\n----------------------------------------------\n" );
printf( "Testing %s:\n", gTestList[i].name );
printf( "----------------------------------------------\n" );
uint64_t startTime = ReadTicks();
int local_error = gTestList[i].test_func();
uint64_t currentTime = ReadTicks() - startTime;
if( local_error )
{
numFailedTests++;
printf( "*** %s test failed with error: %d\n", gTestList[i].name, local_error );
if( gExitOnError )
return local_error;
if( 0 == err )
err = local_error;
}
else
{
numPassedTests++;
printf("%s Passed.\t\t\t(%2.2gs)\n", gTestList[i].name, TicksToSeconds(currentTime));
}
}
else
{
printf( "\n***Error: Test name \"%s\" not found! Skipping.\n", currentNode->name );
err = -1;
if( gExitOnError )
return -1;
}
free( currentNode );
}
}
printf( "\n----------------------------------------------\n" );
printf("numPassedTests = %d, numFailedTests = %d\n",numPassedTests,numFailedTests);
free(gFullPath);
return err;
}
static int Init( void )
{
// init the timer
TicksToCycles(0);
return 0;
}
static int ParseArgs( int argc, const char *argv[] )
{
int listTests = 0;
TestNode *list = NULL;
gArgc = argc;
gArgv = argv;
gFullPath = (char*)malloc( strlen(argv[0]) + 1);
strcpy(gFullPath, argv[0]);
gAppName = basename( gFullPath );
if( NULL == gAppName )
gAppName = "<unknown app name>";
printf( "%s ", gAppName );
int skipremaining=0;
size_t i;
for( i = 1; i < argc; i++ )
{
const char *arg = argv[i];
printf( "\t%s", arg );
if( arg[0] == '-' )
{
arg++;
while( arg[0] != '\0' )
{
int stop = 0;
switch( arg[0] )
{
case 'a':
gReportAverageTimes ^= 1;
break;
case 'e':
gExitOnError ^= 1;
break;
case 'h':
PrintUsage();
return EXIT_NO_ERROR;
case 'l':
listTests ^= 1;
return EXIT_NO_ERROR;
case 's':
gReportNanoseconds ^= 1;
break;
case ' ':
stop = 1;
break;
case 'N'://ignore the -NSDocumentRevisionsDebugMode argument from XCode 4.3.2
skipremaining = 1;
stop = 1;
break;
default:
printf( "\nError: Unknown flag \'%c\'\n", arg[0] );
return -1;
}
if( stop )
break;
arg++;
}
}
else
{ // add function name to the list
TestNode *node = (TestNode*) malloc( sizeof( TestNode ) );
node->name = arg;
node->next = list;
list = node;
}
if (skipremaining)
break;
}
// reverse the list of test names, and stick on gNodeList
while( list )
{
TestNode *node = list;
TestNode *next = node->next;
node->next = gNodeList;
gNodeList = node;
list = next;
}
printf( "\n" );
if( listTests )
ListTests();
return 0;
}
static void PrintUsage( void )
{
printf("\nUsage:\n" );
printf("%s: <-aehls> <test names>", gAppName);
printf("Options:\n");
printf("\t-a\tToggle report average times vs. best times. (Default: best times)\n");
printf("\t-e\tToggle exit immediately on error behavior. (Default: off)\n");
printf("\t-h\tPrint this message.\n");
printf("\t-l\tToggle list available test names. (Default: off)\n");
printf("\t-s\tToggle report times in cycles or nanoseconds. (Default: cycles)\n\n");
printf("\tOptions may be followed by one or more test names. If no test names \n" );
printf("\tare provided, then all tests are run.\n\n");
}
static void ListTests(void )
{
size_t i;
printf("\nTests:\n");
for( i = 0; NULL != gTestList[i].test_func; i++ )
{
printf( "%19s", gTestList[i].name );
if( NULL != gTestList[i].test_func )
printf( "," );
if( 3 == (i&3) )
printf( "\n" );
}
}
#else
#include <stdio.h>
int main(int argc, char* argv[])
{
printf("error: no SIMD enabled through BT_USE_NEON or BT_USE_SSE_IN_API \n(enable in LinearMath/btScalar.h or through build system)\n");
return 0;
}
#endif

25
Test/Source/main.h Normal file
View File

@@ -0,0 +1,25 @@
//
// main.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_main_h
#define BulletTest_main_h
#ifdef __cplusplus
extern "C" {
#endif
extern int gReportAverageTimes; // if 0, report best times
extern int gExitOnError; // if non-zero, exit as soon an an error is encountered
extern const char *gAppName; // the name of this application
#ifdef __cplusplus
}
#endif
#endif

70
Test/Source/vector.h Normal file
View File

@@ -0,0 +1,70 @@
//
// vector.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_vector_h
#define BulletTest_vector_h
#ifdef __SSE__
typedef float float4 __attribute__ ((__vector_size__(16)));
#include <xmmintrin.h>
#endif
#ifdef __SSE2__
typedef double double2 __attribute__ ((__vector_size__(16)));
typedef char char16 __attribute__ ((__vector_size__(16)));
typedef unsigned char uchar16 __attribute__ ((__vector_size__(16)));
typedef short short8 __attribute__ ((__vector_size__(16)));
typedef unsigned short ushort8 __attribute__ ((__vector_size__(16)));
typedef int int4 __attribute__ ((__vector_size__(16)));
// typedef unsigned int uint4 __attribute__ ((__vector_size__(16)));
#ifdef __LP64__
typedef long long2 __attribute__ ((__vector_size__(16)));
typedef unsigned long ulong2 __attribute__ ((__vector_size__(16)));
#else
typedef long long long2 __attribute__ ((__vector_size__(16)));
typedef unsigned long long ulong2 __attribute__ ((__vector_size__(16)));
#endif
#include <emmintrin.h>
#endif
#ifdef __SSE3__
#include <pmmintrin.h>
#endif
#ifdef __SSSE3__
#include <tmmintrin.h>
#endif
#ifdef __SSE4_1__
#include <smmintrin.h>
#endif
#ifdef __arm__
#include <arm/arch.h>
#ifdef _ARM_ARCH_7
#define ARM_NEON_GCC_COMPATIBILITY 1
#include <arm_neon.h>
typedef float float4 __attribute__ ((__vector_size__(16)));
typedef double double2 __attribute__ ((__vector_size__(16)));
typedef char char16 __attribute__ ((__vector_size__(16)));
typedef unsigned char uchar16 __attribute__ ((__vector_size__(16)));
typedef short short8 __attribute__ ((__vector_size__(16)));
typedef unsigned short ushort8 __attribute__ ((__vector_size__(16)));
typedef int int4 __attribute__ ((__vector_size__(16)));
typedef unsigned int uint4 __attribute__ ((__vector_size__(16)));
#ifdef __LP64__
typedef long long2 __attribute__ ((__vector_size__(16)));
typedef unsigned long ulong2 __attribute__ ((__vector_size__(16)));
#else
typedef long long long2 __attribute__ ((__vector_size__(16)));
typedef unsigned long long ulong2 __attribute__ ((__vector_size__(16)));
#endif
#endif
#endif
#endif