Merge pull request #170 from erwincoumans/master

sync repos
This commit is contained in:
erwincoumans
2014-05-07 09:00:24 -07:00
172 changed files with 42949 additions and 0 deletions

47
test/Bullet2/Info.plist Normal file
View File

@@ -0,0 +1,47 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>CFBundleDevelopmentRegion</key>
<string>en</string>
<key>CFBundleDisplayName</key>
<string>${PRODUCT_NAME}</string>
<key>CFBundleExecutable</key>
<string>${EXECUTABLE_NAME}</string>
<key>CFBundleIconFiles</key>
<array/>
<key>CFBundleIdentifier</key>
<string>Apple.${PRODUCT_NAME:rfc1034identifier}</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleName</key>
<string>${PRODUCT_NAME}</string>
<key>CFBundlePackageType</key>
<string>APPL</string>
<key>CFBundleShortVersionString</key>
<string>1.0</string>
<key>CFBundleSignature</key>
<string>????</string>
<key>CFBundleVersion</key>
<string>1.0</string>
<key>LSRequiresIPhoneOS</key>
<true/>
<key>UIRequiredDeviceCapabilities</key>
<array>
<string>armv7</string>
</array>
<key>UISupportedInterfaceOrientations</key>
<array>
<string>UIInterfaceOrientationPortrait</string>
<string>UIInterfaceOrientationLandscapeLeft</string>
<string>UIInterfaceOrientationLandscapeRight</string>
</array>
<key>UISupportedInterfaceOrientations~ipad</key>
<array>
<string>UIInterfaceOrientationPortrait</string>
<string>UIInterfaceOrientationPortraitUpsideDown</string>
<string>UIInterfaceOrientationLandscapeLeft</string>
<string>UIInterfaceOrientationLandscapeRight</string>
</array>
</dict>
</plist>

28
test/Bullet2/README.txt Normal file
View File

@@ -0,0 +1,28 @@
1) Add a .cpp and .h file for your test function. The function should conform to:
#ifdef __cplusplus
extern "C" {
#endif
#include "Utils.h"
#include "main.h"
#include "vector.h"
// Your test function
int MyTestFunc(void);
#ifdef __cplusplus
}
#endif
The rest of the program doesn't care or know what you do in MyTestFunc, except that MyTestFunc should return non-zero in case of failure in MyTestFunc. There are some handy functions in Utils.h that you might want to use. Please use vlog instead of printf to print stuff, and random_number32/64() in place of rand(), so I can multithread later if it comes to that. There are some read-only globals that you may wish to respond to, declared in Utils.h:
gReportAverageTimes if you do timing, report times as averages instead of best times if non-zero
gExitOnError if non-zero, return non-zero immediately if you encounter an error
gAppName (const char*) the name of the application
As a convenience, vector.h has some cross platform vector types declared and will correctly include various vector headers according to compiler flag.
2) Add an entry to gTestList in TestList.cpp for your test function, so the rest of the app knows to call it

View File

@@ -0,0 +1,97 @@
//
// TestList.c
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include <stdlib.h>
#include "TestList.h"
#include "Test_qtmul.h"
#include "Test_qtmulQV3.h"
#include "Test_qtmulV3Q.h"
#include "Test_qtdot.h"
#include "Test_qtnorm.h"
#include "Test_v3dot.h"
#include "Test_v3sdiv.h"
#include "Test_v3norm.h"
#include "Test_v3cross.h"
#include "Test_v3triple.h"
#include "Test_v3interp.h"
#include "Test_v3lerp.h"
#include "Test_v3skew.h"
#include "Test_v3div.h"
#include "Test_v3rotate.h"
#include "Test_maxdot.h"
#include "Test_mindot.h"
#include "Test_dot3.h"
#include "Test_3x3transpose.h"
#include "Test_3x3transposeTimes.h"
#include "Test_3x3timesTranspose.h"
#include "Test_3x3mulM.h"
#include "Test_3x3mulM1M2.h"
#include "Test_3x3mulMV.h"
#include "Test_3x3mulVM.h"
#include "Test_3x3setRot.h"
#include "Test_3x3getRot.h"
#include "Test_btDbvt.h"
#include "Test_quat_aos_neon.h"
#include "LinearMath/btScalar.h"
#define ENTRY( _name, _func ) { _name, _func }
//
// Test functions have the form int (*TestFunc)( void )
// They return a non-zero result in case of failure.
//
// Please see handy stuff in Utils.h, vector.h when writing your test code.
//
#if defined (BT_USE_NEON) || defined (BT_USE_SSE_IN_API)
TestDesc gTestList[] =
{
ENTRY( "maxdot", Test_maxdot ),
ENTRY( "mindot", Test_mindot ),
ENTRY( "qtmul", Test_qtmul ),
ENTRY( "qtmulQV3", Test_qtmulQV3 ),
ENTRY( "qtmulV3Q", Test_qtmulV3Q ),
ENTRY( "qtdot", Test_qtdot ),
ENTRY( "qtnorm", Test_qtnorm ),
ENTRY( "v3dot", Test_v3dot ),
ENTRY( "v3sdiv", Test_v3sdiv ),
ENTRY( "v3norm", Test_v3norm ),
ENTRY( "v3cross", Test_v3cross ),
ENTRY( "v3triple", Test_v3triple ),
ENTRY( "v3interp", Test_v3interp ),
ENTRY( "v3lerp", Test_v3lerp ),
ENTRY( "v3skew", Test_v3skew ),
ENTRY( "v3div", Test_v3div ),
ENTRY( "v3rotate", Test_v3rotate ),
ENTRY( "dot3", Test_dot3 ),
ENTRY( "3x3transpose", Test_3x3transpose ),
ENTRY( "3x3transposeTimes", Test_3x3transposeTimes ),
ENTRY( "3x3timesTranspose", Test_3x3timesTranspose ),
ENTRY( "3x3mulM", Test_3x3mulM ),
ENTRY( "3x3mulM1M2", Test_3x3mulM1M2 ),
ENTRY( "3x3mulMV", Test_3x3mulMV ),
ENTRY( "3x3mulVM", Test_3x3mulMV ),
ENTRY( "3x3setRot", Test_3x3setRot ),
ENTRY( "3x3getRot", Test_3x3getRot ),
ENTRY( "btDbvt", Test_btDbvt ),
ENTRY("quat_aos_neon", Test_quat_aos_neon),
{ NULL, NULL }
};
#else
TestDesc gTestList[]={{NULL,NULL}};
#endif

View File

@@ -0,0 +1,28 @@
//
// TestList.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_TestList_h
#define BulletTest_TestList_h
#ifdef __cplusplus
extern "C" {
#endif
typedef struct TestDesc
{
const char *name;
int (*test_func)(void); // return 0 for success, non-zero for failure
}TestDesc;
extern TestDesc gTestList[];
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,158 @@
//
// Test_3x3getRot.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_3x3getRot.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btMatrix3x3.h>
#define LOOPCOUNT 1000
#define ARRAY_SIZE 128
static inline btSimdFloat4 rand_f4(void)
{
return btAssign128( RANDF_m1p1, RANDF_m1p1, RANDF_m1p1, BT_NAN ); // w channel NaN
}
static inline btSimdFloat4 qtNAN_f4(void)
{
return btAssign128( BT_NAN, BT_NAN, BT_NAN, BT_NAN );
}
static void M3x3getRot_ref( const btMatrix3x3 &m, btQuaternion &q )
{
btVector3 m_el[3] = { m[0], m[1], m[2] };
btScalar trace = m_el[0].x() + m_el[1].y() + m_el[2].z();
btScalar temp[4];
if (trace > btScalar(0.0))
{
btScalar s = btSqrt(trace + btScalar(1.0));
temp[3]=(s * btScalar(0.5));
s = btScalar(0.5) / s;
temp[0]=((m_el[2].y() - m_el[1].z()) * s);
temp[1]=((m_el[0].z() - m_el[2].x()) * s);
temp[2]=((m_el[1].x() - m_el[0].y()) * s);
}
else
{
int i = m_el[0].x() < m_el[1].y() ?
(m_el[1].y() < m_el[2].z() ? 2 : 1) :
(m_el[0].x() < m_el[2].z() ? 2 : 0);
int j = (i + 1) % 3;
int k = (i + 2) % 3;
btScalar s = btSqrt(m_el[i][i] - m_el[j][j] - m_el[k][k] + btScalar(1.0));
temp[i] = s * btScalar(0.5);
s = btScalar(0.5) / s;
temp[3] = (m_el[k][j] - m_el[j][k]) * s;
temp[j] = (m_el[j][i] + m_el[i][j]) * s;
temp[k] = (m_el[k][i] + m_el[i][k]) * s;
}
q.setValue(temp[0],temp[1],temp[2],temp[3]);
}
static int operator!= ( const btQuaternion &a, const btQuaternion &b )
{
if( fabs(a.x() - b.x()) +
fabs(a.y() - b.y()) +
fabs(a.z() - b.z()) +
fabs(a.w() - b.w()) > FLT_EPSILON * 4)
return 1;
return 0;
}
int Test_3x3getRot(void)
{
// Init an array flanked by guard pages
btMatrix3x3 in1[ARRAY_SIZE];
btQuaternion out[ARRAY_SIZE];
btQuaternion out2[ARRAY_SIZE];
// Init the data
size_t i, j;
for( i = 0; i < ARRAY_SIZE; i++ )
{
in1[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
out[i] = btQuaternion(qtNAN_f4());
out2[i] = btQuaternion(qtNAN_f4());
M3x3getRot_ref(in1[i], out[i]);
in1[i].getRotation(out2[i]);
if( out[i] != out2[i] )
{
vlog( "Error - M3x3getRot result error! ");
vlog( "failure @ %ld\n", i);
vlog( "\ncorrect = (%10.7f, %10.7f, %10.7f, %10.7f) "
"\ntested = (%10.7f, %10.7f, %10.7f, %10.7f) \n",
out[i].x(), out[i].y(), out[i].z(), out[i].w(),
out2[i].x(), out2[i].y(), out2[i].z(), out2[i].w());
return -1;
}
}
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = ~(bestTime&0);//-1ULL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
M3x3getRot_ref(in1[i], out[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
bestTime = ~(bestTime&0);//-1ULL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
{
in1[i].getRotation(out2[i]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog( "Timing:\n" );
vlog( "\t scalar\t vector\n" );
vlog( "\t%10.2f\t%10.2f\n", TicksToCycles( scalarTime ) / ARRAY_SIZE, TicksToCycles( vectorTime ) / ARRAY_SIZE );
return 0;
}
#endif//BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_3x3getRot.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_3x3getRot_h
#define BulletTest_Test_3x3getRot_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_3x3getRot(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,169 @@
//
// Test_3x3mulM.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_3x3mulM.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btMatrix3x3.h>
#define LOOPCOUNT 1000
#define ARRAY_SIZE 128
static inline btSimdFloat4 rand_f4(void)
{
return btAssign128( RANDF_01, RANDF_01, RANDF_01, BT_NAN ); // w channel NaN
}
static btMatrix3x3 M3x3mulM_ref( btMatrix3x3 &in, const btMatrix3x3 &m )
{
btVector3 m_el[3] = { in[0], in[1], in[2] };
in.setValue(
m.tdotx(m_el[0]), m.tdoty(m_el[0]), m.tdotz(m_el[0]),
m.tdotx(m_el[1]), m.tdoty(m_el[1]), m.tdotz(m_el[1]),
m.tdotx(m_el[2]), m.tdoty(m_el[2]), m.tdotz(m_el[2]));
return in;
}
static SIMD_FORCE_INLINE bool fuzzyEqualSlow(const btVector3& ref, const btVector3& other)
{
const btScalar epsilon = SIMD_EPSILON;
return ((btFabs(ref.m_floats[3]-other.m_floats[3])<=epsilon) &&
(btFabs(ref.m_floats[2]-other.m_floats[2])<=epsilon) &&
(btFabs(ref.m_floats[1]-other.m_floats[1])<=epsilon) &&
(btFabs(ref.m_floats[0]-other.m_floats[0])<=epsilon));
}
static int operator!= ( const btMatrix3x3 &a, const btMatrix3x3 &b )
{
if( a.getRow(0) != b.getRow(0) )
{
if (!fuzzyEqualSlow(a.getRow(0),b.getRow(0)))
{
return 1;
}
}
if( a.getRow(1) != b.getRow(1) )
{
if( !fuzzyEqualSlow(a.getRow(1),b.getRow(1)) )
return 1;
}
if( a.getRow(2) != b.getRow(2) )
{
if( !fuzzyEqualSlow(a.getRow(2),b.getRow(2)) )
{
return 1;
}
}
return 0;
}
int Test_3x3mulM(void)
{
// Init an array flanked by guard pages
btMatrix3x3 in1[ARRAY_SIZE];
btMatrix3x3 in2[ARRAY_SIZE];
btMatrix3x3 in3[ARRAY_SIZE];
btMatrix3x3 out[ARRAY_SIZE];
btMatrix3x3 out2[ARRAY_SIZE];
// Init the data
size_t i, j;
for( i = 0; i < ARRAY_SIZE; i++ )
{
in1[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
in2[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
in3[i] = in1[i];
out[i] = M3x3mulM_ref(in1[i], in2[i]);
out2[i] = (in3[i] *= in2[i]);
if( out[i] != out2[i] )
{
vlog( "Error - M3x3mulM result error! ");
vlog( "failure @ %ld\n", i);
btVector3 m0, m1, m2;
m0 = out[i].getRow(0);
m1 = out[i].getRow(1);
m2 = out[i].getRow(2);
vlog( "\ncorrect = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) \n",
m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3],
m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3],
m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]);
m0 = out2[i].getRow(0);
m1 = out2[i].getRow(1);
m2 = out2[i].getRow(2);
vlog( "\ntested = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) \n",
m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3],
m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3],
m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]);
return -1;
}
}
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out[i] = M3x3mulM_ref(in1[i], in2[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out2[i] = (in3[i] *= in2[i]);
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog( "Timing:\n" );
vlog( "\t scalar\t vector\n" );
vlog( "\t%10.2f\t%10.2f\n", TicksToCycles( scalarTime ) / ARRAY_SIZE, TicksToCycles( vectorTime ) / ARRAY_SIZE );
return 0;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_3x3mulM.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_3x3mulM_h
#define BulletTest_Test_3x3mulM_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_3x3mulM(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,164 @@
//
// Test_3x3mulM1M2.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_3x3mulM1M2.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btMatrix3x3.h>
#define LOOPCOUNT 1000
#define ARRAY_SIZE 128
static inline btSimdFloat4 rand_f4(void)
{
return btAssign128( RANDF_01, RANDF_01, RANDF_01, BT_NAN ); // w channel NaN
}
static btMatrix3x3 M3x3mulM1M2_ref( const btMatrix3x3 &m1, const btMatrix3x3 &m2 )
{
return btMatrix3x3(
m2.tdotx(m1[0]), m2.tdoty(m1[0]), m2.tdotz(m1[0]),
m2.tdotx(m1[1]), m2.tdoty(m1[1]), m2.tdotz(m1[1]),
m2.tdotx(m1[2]), m2.tdoty(m1[2]), m2.tdotz(m1[2]));
}
static bool fuzzyEqualSlow(const btVector3& ref, const btVector3& other)
{
const btScalar epsilon = SIMD_EPSILON;
return ((btFabs(ref.m_floats[3]-other.m_floats[3])<=epsilon) &&
(btFabs(ref.m_floats[2]-other.m_floats[2])<=epsilon) &&
(btFabs(ref.m_floats[1]-other.m_floats[1])<=epsilon) &&
(btFabs(ref.m_floats[0]-other.m_floats[0])<=epsilon));
}
static int operator!= ( const btMatrix3x3 &a, const btMatrix3x3 &b )
{
if( a.getRow(0) != b.getRow(0) )
{
if (!fuzzyEqualSlow(a.getRow(0),b.getRow(0)))
{
return 1;
}
}
if( a.getRow(1) != b.getRow(1) )
{
if( !fuzzyEqualSlow(a.getRow(1),b.getRow(1)) )
return 1;
}
if( a.getRow(2) != b.getRow(2) )
{
if( !fuzzyEqualSlow(a.getRow(2),b.getRow(2)) )
{
return 1;
}
}
return 0;
}
int Test_3x3mulM1M2(void)
{
// Init an array flanked by guard pages
btMatrix3x3 in1[ARRAY_SIZE];
btMatrix3x3 in2[ARRAY_SIZE];
btMatrix3x3 out[ARRAY_SIZE];
btMatrix3x3 out2[ARRAY_SIZE];
// Init the data
size_t i, j;
for( i = 0; i < ARRAY_SIZE; i++ )
{
in1[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
in2[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
out[i] = M3x3mulM1M2_ref(in1[i], in2[i]);
out2[i] = (in1[i] * in2[i]);
if( out[i] != out2[i] )
{
vlog( "Error - M3x3mulM1M2 result error! ");
vlog( "failure @ %ld\n", i);
btVector3 m0, m1, m2;
m0 = out[i].getRow(0);
m1 = out[i].getRow(1);
m2 = out[i].getRow(2);
vlog( "\ncorrect = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) \n",
m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3],
m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3],
m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]);
m0 = out2[i].getRow(0);
m1 = out2[i].getRow(1);
m2 = out2[i].getRow(2);
vlog( "\ntested = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) \n",
m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3],
m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3],
m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]);
return -1;
}
}
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out[i] = M3x3mulM1M2_ref(in1[i], in2[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out2[i] = (in1[i] * in2[i]);
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog( "Timing:\n" );
vlog( "\t scalar\t vector\n" );
vlog( "\t%10.2f\t%10.2f\n", TicksToCycles( scalarTime ) / ARRAY_SIZE, TicksToCycles( vectorTime ) / ARRAY_SIZE );
return 0;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_3x3mulM1M2.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_3x3mulM1M2_h
#define BulletTest_Test_3x3mulM1M2_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_3x3mulM1M2(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,112 @@
//
// Test_3x3mulMV.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_3x3mulMV.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btMatrix3x3.h>
#define LOOPCOUNT 1000
#define ARRAY_SIZE 128
static inline btSimdFloat4 rand_f4(void)
{
return btAssign128(RANDF_01, RANDF_01, RANDF_01, BT_NAN ); // w channel NaN
}
static btVector3 M3x3mulMV_ref( const btMatrix3x3 &m, const btVector3 &v )
{
return btVector3(m[0].dot(v), m[1].dot(v), m[2].dot(v));
}
int Test_3x3mulMV(void)
{
// Init an array flanked by guard pages
btMatrix3x3 in1[ARRAY_SIZE];
btVector3 in2[ARRAY_SIZE];
btVector3 out[ARRAY_SIZE];
btVector3 out2[ARRAY_SIZE];
// Init the data
size_t i, j;
for( i = 0; i < ARRAY_SIZE; i++ )
{
in1[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
in2[i] = btVector3(rand_f4());
out[i] = M3x3mulMV_ref(in1[i], in2[i]);
out2[i] = (in1[i] * in2[i]);
if( fabsf(out[i].m_floats[0] - out2[i].m_floats[0]) +
fabsf(out[i].m_floats[1] - out2[i].m_floats[1]) +
fabsf(out[i].m_floats[2] - out2[i].m_floats[2]) +
fabsf(out[i].m_floats[3] - out2[i].m_floats[3]) > FLT_EPSILON*4 )
{
vlog( "Error - M3x3mulMV result error! ");
vlog( "failure @ %ld\n", i);
vlog( "\ncorrect = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f, %10.4f) \n",
out[i].m_floats[0], out[i].m_floats[1], out[i].m_floats[2], out[i].m_floats[3],
out2[i].m_floats[0], out2[i].m_floats[1], out2[i].m_floats[2], out2[i].m_floats[3]);
return 1;
}
}
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out[i] = M3x3mulMV_ref(in1[i], in2[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out2[i] = (in1[i] * in2[i]);
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog( "Timing:\n" );
vlog( "\t scalar\t vector\n" );
vlog( "\t%10.2f\t%10.2f\n", TicksToCycles( scalarTime ) / ARRAY_SIZE, TicksToCycles( vectorTime ) / ARRAY_SIZE );
return 0;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,23 @@
//
// Test_3x3mulMV.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_3x3mulMV_h
#define BulletTest_Test_3x3mulMV_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_3x3mulMV(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,112 @@
//
// Test_3x3mulVM.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_3x3mulVM.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btMatrix3x3.h>
#define LOOPCOUNT 1000
#define ARRAY_SIZE 128
static inline btSimdFloat4 rand_f4(void)
{
return btAssign128( RANDF_01, RANDF_01, RANDF_01, BT_NAN ); // w channel NaN
}
static btVector3 M3x3mulVM_ref( const btVector3 &v, const btMatrix3x3 &m)
{
return btVector3(m.tdotx(v), m.tdoty(v), m.tdotz(v));
}
int Test_3x3mulVM(void)
{
// Init an array flanked by guard pages
btVector3 in1[ARRAY_SIZE];
btMatrix3x3 in2[ARRAY_SIZE];
btVector3 out[ARRAY_SIZE];
btVector3 out2[ARRAY_SIZE];
// Init the data
size_t i, j;
for( i = 0; i < ARRAY_SIZE; i++ )
{
in1[i] = btVector3(rand_f4());
in2[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
out[i] = M3x3mulVM_ref(in1[i], in2[i]);
out2[i] = (in1[i] * in2[i]);
if( fabsf(out[i].m_floats[0] - out2[i].m_floats[0]) +
fabsf(out[i].m_floats[1] - out2[i].m_floats[1]) +
fabsf(out[i].m_floats[2] - out2[i].m_floats[2]) +
fabsf(out[i].m_floats[3] - out2[i].m_floats[3]) > FLT_EPSILON*4 )
{
vlog( "Error - M3x3mulVM result error! ");
vlog( "failure @ %ld\n", i);
vlog( "\ncorrect = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f, %10.4f) \n",
out[i].m_floats[0], out[i].m_floats[1], out[i].m_floats[2], out[i].m_floats[3],
out2[i].m_floats[0], out2[i].m_floats[1], out2[i].m_floats[2], out2[i].m_floats[3]);
return 1;
}
}
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out[i] = M3x3mulVM_ref(in1[i], in2[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out2[i] = (in1[i] * in2[i]);
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog( "Timing:\n" );
vlog( "\t scalar\t vector\n" );
vlog( "\t%10.2f\t%10.2f\n", TicksToCycles( scalarTime ) / ARRAY_SIZE, TicksToCycles( vectorTime ) / ARRAY_SIZE );
return 0;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_3x3mulVM.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_3x3mulVM_h
#define BulletTest_Test_3x3mulVM_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_3x3mulVM(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,171 @@
//
// Test_3x3setRot.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_3x3setRot.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btMatrix3x3.h>
#define LOOPCOUNT 1000
#define ARRAY_SIZE 128
static inline btSimdFloat4 rand_f4(void)
{
return btAssign128( RANDF_01, RANDF_01, RANDF_01, BT_NAN ); // w channel NaN
}
static inline btSimdFloat4 qtrand_f4(void)
{
return btAssign128( RANDF_01, RANDF_01, RANDF_01, RANDF_01 );
}
static btMatrix3x3 M3x3setRot_ref( btMatrix3x3 &m, const btQuaternion &q )
{
btScalar d = q.length2();
btScalar s = btScalar(2.0) / d;
btScalar xs = q.x() * s, ys = q.y() * s, zs = q.z() * s;
btScalar wx = q.w() * xs, wy = q.w() * ys, wz = q.w() * zs;
btScalar xx = q.x() * xs, xy = q.x() * ys, xz = q.x() * zs;
btScalar yy = q.y() * ys, yz = q.y() * zs, zz = q.z() * zs;
m.setValue(
btScalar(1.0) - (yy + zz), xy - wz, xz + wy,
xy + wz, btScalar(1.0) - (xx + zz), yz - wx,
xz - wy, yz + wx, btScalar(1.0) - (xx + yy));
return m;
}
static int operator!= ( const btMatrix3x3 &a, const btMatrix3x3 &b )
{
int i;
btVector3 av3, bv3;
for(i=0; i<3; i++)
{
av3 = a.getRow(i);
bv3 = b.getRow(i);
if( fabs(av3.m_floats[0] - bv3.m_floats[0]) +
fabs(av3.m_floats[1] - bv3.m_floats[1]) +
fabs(av3.m_floats[2] - bv3.m_floats[2]) > FLT_EPSILON * 4)
return 1;
}
return 0;
}
int Test_3x3setRot(void)
{
// Init an array flanked by guard pages
btMatrix3x3 in1[ARRAY_SIZE];
btQuaternion in2[ARRAY_SIZE];
btMatrix3x3 in3[ARRAY_SIZE];
btMatrix3x3 out[ARRAY_SIZE];
btMatrix3x3 out2[ARRAY_SIZE];
// Init the data
size_t i, j;
for( i = 0; i < ARRAY_SIZE; i++ )
{
in1[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
in2[i] = btQuaternion(qtrand_f4());
in3[i] = in1[i];
out[i] = M3x3setRot_ref(in1[i], in2[i]);
in3[i].setRotation(in2[i]);
out2[i] = in3[i];
if( out[i] != out2[i] )
{
vlog( "Error - M3x3setRot result error! ");
vlog( "failure @ %ld\n", i);
btVector3 m0, m1, m2;
m0 = out[i].getRow(0);
m1 = out[i].getRow(1);
m2 = out[i].getRow(2);
vlog( "\ncorrect = (%10.7f, %10.7f, %10.7f, %10.7f) "
"\n (%10.7f, %10.7f, %10.7f, %10.7f) "
"\n (%10.7f, %10.7f, %10.7f, %10.7f) \n",
m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3],
m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3],
m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]);
m0 = out2[i].getRow(0);
m1 = out2[i].getRow(1);
m2 = out2[i].getRow(2);
vlog( "\ntested = (%10.7f, %10.7f, %10.7f, %10.7f) "
"\n (%10.7f, %10.7f, %10.7f, %10.7f) "
"\n (%10.7f, %10.7f, %10.7f, %10.7f) \n",
m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3],
m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3],
m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]);
return -1;
}
}
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out[i] = M3x3setRot_ref(in1[i], in2[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++)
{
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
{
in3[i].setRotation(in2[i]);
out2[i] = in3[i];
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog( "Timing:\n" );
vlog( "\t scalar\t vector\n" );
vlog( "\t%10.2f\t%10.2f\n", TicksToCycles( scalarTime ) / ARRAY_SIZE, TicksToCycles( vectorTime ) / ARRAY_SIZE );
return 0;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_3x3setRot.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_3x3setRot_h
#define BulletTest_Test_3x3setRot_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_3x3setRot(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,117 @@
//
// Test_3x3timesTranspose.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_3x3timesTranspose.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btMatrix3x3.h>
#define LOOPCOUNT 1000
#define ARRAY_SIZE 128
static inline btSimdFloat4 rand_f4(void)
{
return btAssign128( RANDF, RANDF, RANDF, BT_NAN ); // w channel NaN
}
static btMatrix3x3 timesTranspose( const btMatrix3x3 &in, const btMatrix3x3 &m )
{
btVector3 m_el[3] = { in[0], in[1], in[2] };
return btMatrix3x3(
m_el[0].dot(m[0]), m_el[0].dot(m[1]), m_el[0].dot(m[2]),
m_el[1].dot(m[0]), m_el[1].dot(m[1]), m_el[1].dot(m[2]),
m_el[2].dot(m[0]), m_el[2].dot(m[1]), m_el[2].dot(m[2]));
}
static int operator!= ( const btMatrix3x3 &a, const btMatrix3x3 &b )
{
if( a.getRow(0) != b.getRow(0) )
return 1;
if( a.getRow(1) != b.getRow(1) )
return 1;
if( a.getRow(2) != b.getRow(2) )
return 1;
return 0;
}
int Test_3x3timesTranspose(void)
{
// Init an array flanked by guard pages
btMatrix3x3 in1[ARRAY_SIZE];
btMatrix3x3 in2[ARRAY_SIZE];
btMatrix3x3 out[ARRAY_SIZE];
btMatrix3x3 out2[ARRAY_SIZE];
// Init the data
size_t i, j;
for( i = 0; i < ARRAY_SIZE; i++ )
{
in1[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
in2[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
out[i] = timesTranspose(in1[i], in2[i]);
out2[i] = in1[i].timesTranspose(in2[i]);
if( out[i] != out2[i] )
{
printf( "failure @ %ld\n", i);
return -1;
}
}
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++) {
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out[i] = timesTranspose(in1[i], in2[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++) {
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out[i] = in1[i].timesTranspose(in2[i]);
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog( "Timing:\n" );
vlog( "\t scalar\t vector\n" );
vlog( "\t%10.2f\t%10.2f\n", TicksToCycles( scalarTime ) / ARRAY_SIZE, TicksToCycles( vectorTime ) / ARRAY_SIZE );
return 0;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_3x3timesTranspose.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_3x3timesTranspose_h
#define BulletTest_Test_3x3timesTranspose_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_3x3timesTranspose(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,116 @@
//
// Test_3x3transpose.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_3x3transpose.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btMatrix3x3.h>
#define LOOPCOUNT 1000
#define ARRAY_SIZE 1024
static inline btSimdFloat4 rand_f4(void)
{
return btAssign128( RANDF, RANDF, RANDF, BT_NAN ); // w channel NaN
}
static btMatrix3x3 Transpose( btMatrix3x3 &in )
{
btVector3 row0 = in.getRow(0);
btVector3 row1 = in.getRow(1);
btVector3 row2 = in.getRow(2);
btVector3 col0 = btAssign128(row0.x(), row1.x(), row2.x(), 0 );
btVector3 col1 = btAssign128(row0.y(), row1.y(), row2.y(), 0 );
btVector3 col2 = btAssign128(row0.z(), row1.z(), row2.z(), 0);
return btMatrix3x3( col0, col1, col2);
}
static int operator!= ( const btMatrix3x3 &a, const btMatrix3x3 &b )
{
if( a.getRow(0) != b.getRow(0) )
return 1;
if( a.getRow(1) != b.getRow(1) )
return 1;
if( a.getRow(2) != b.getRow(2) )
return 1;
return 0;
}
int Test_3x3transpose(void)
{
// Init an array flanked by guard pages
btMatrix3x3 in[ARRAY_SIZE];
btMatrix3x3 out[ARRAY_SIZE];
btMatrix3x3 out2[ARRAY_SIZE];
// Init the data
size_t i, j;
for( i = 0; i < ARRAY_SIZE; i++ )
{
in[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
out[i] = Transpose(in[i]);
out2[i] = in[i].transpose();
if( out[i] != out2[i] )
{
printf( "failure @ %ld\n", i);
return -1;
}
}
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++) {
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out[i] = Transpose(in[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++) {
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out[i] = in[i].transpose();
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog( "Timing:\n" );
vlog( "\t scalar\t vector\n" );
vlog( "\t%10.2f\t%10.2f\n", TicksToCycles( scalarTime ) / ARRAY_SIZE, TicksToCycles( vectorTime ) / ARRAY_SIZE );
return 0;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_3x3transpose.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_3x3transpose_h
#define BulletTest_Test_3x3transpose_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_3x3transpose(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,168 @@
//
// Test_3x3transposeTimes.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_3x3transposeTimes.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btMatrix3x3.h>
#define LOOPCOUNT 1000
#define ARRAY_SIZE 128
static inline btSimdFloat4 rand_f4(void)
{
return btAssign128( RANDF_01, RANDF_01, RANDF_01, BT_NAN ); // w channel NaN
}
static btMatrix3x3 TransposeTimesReference( const btMatrix3x3 &in, const btMatrix3x3 &m )
{
btVector3 m_el[3] = { in[0], in[1], in[2] };
btSimdFloat4 r0 = btAssign128(m_el[0].x() * m[0].x() + m_el[1].x() * m[1].x() + m_el[2].x() * m[2].x(),
m_el[0].x() * m[0].y() + m_el[1].x() * m[1].y() + m_el[2].x() * m[2].y(),
m_el[0].x() * m[0].z() + m_el[1].x() * m[1].z() + m_el[2].x() * m[2].z(),
0.0f );
btSimdFloat4 r1 = btAssign128( m_el[0].y() * m[0].x() + m_el[1].y() * m[1].x() + m_el[2].y() * m[2].x(),
m_el[0].y() * m[0].y() + m_el[1].y() * m[1].y() + m_el[2].y() * m[2].y(),
m_el[0].y() * m[0].z() + m_el[1].y() * m[1].z() + m_el[2].y() * m[2].z(),
0.0f );
btSimdFloat4 r2 = btAssign128( m_el[0].z() * m[0].x() + m_el[1].z() * m[1].x() + m_el[2].z() * m[2].x(),
m_el[0].z() * m[0].y() + m_el[1].z() * m[1].y() + m_el[2].z() * m[2].y(),
m_el[0].z() * m[0].z() + m_el[1].z() * m[1].z() + m_el[2].z() * m[2].z(),
0.0f );
return btMatrix3x3( r0, r1, r2 );
}
static int operator!= ( const btMatrix3x3 &a, const btMatrix3x3 &b )
{
if( a.getRow(0) != b.getRow(0) )
return 1;
if( a.getRow(1) != b.getRow(1) )
return 1;
if( a.getRow(2) != b.getRow(2) )
return 1;
return 0;
}
int Test_3x3transposeTimes(void)
{
// Init an array flanked by guard pages
btMatrix3x3 in1[ARRAY_SIZE];
btMatrix3x3 in2[ARRAY_SIZE];
btMatrix3x3 out[ARRAY_SIZE];
btMatrix3x3 out2[ARRAY_SIZE];
float maxRelativeError = 0.f;
// Init the data
size_t i, j;
for( i = 0; i < ARRAY_SIZE; i++ )
{
in1[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
in2[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );
out[i] = TransposeTimesReference(in1[i], in2[i]);
out2[i] = in1[i].transposeTimes(in2[i]);
if( out[i] != out2[i] )
{
float relativeError = 0.f;
for (int column=0;column<3;column++)
for (int row=0;row<3;row++)
relativeError = btMax(relativeError,btFabs(out2[i][row][column] - out[i][row][column]) / out[i][row][column]);
if (relativeError>1e-6)
{
vlog( "failure @ %ld\n", i);
btVector3 m0, m1, m2;
m0 = out[i].getRow(0);
m1 = out[i].getRow(1);
m2 = out[i].getRow(2);
vlog( "\ncorrect = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) \n",
m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3],
m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3],
m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]);
m0 = out2[i].getRow(0);
m1 = out2[i].getRow(1);
m2 = out2[i].getRow(2);
vlog( "\ntested = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) "
"\n (%10.4f, %10.4f, %10.4f, %10.4f) \n",
m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3],
m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3],
m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]);
return -1;
} else
{
if (relativeError>maxRelativeError)
maxRelativeError = relativeError;
}
}
}
if (maxRelativeError)
{
printf("Warning: maxRelativeError = %e\n",maxRelativeError);
}
uint64_t scalarTime, vectorTime;
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < LOOPCOUNT; j++) {
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out[i] = TransposeTimesReference(in1[i], in2[i]);
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= LOOPCOUNT;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < LOOPCOUNT; j++) {
startTime = ReadTicks();
for( i = 0; i < ARRAY_SIZE; i++ )
out[i] = in1[i].transposeTimes(in2[i]);
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= LOOPCOUNT;
vlog( "Timing:\n" );
vlog( "\t scalar\t vector\n" );
vlog( "\t%10.2f\t%10.2f\n", TicksToCycles( scalarTime ) / ARRAY_SIZE, TicksToCycles( vectorTime ) / ARRAY_SIZE );
return 0;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_3x3transposeTimes.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_3x3transposeTimes_h
#define BulletTest_Test_3x3transposeTimes_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_3x3transposeTimes(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,495 @@
//
// Test_btDbvt.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc., Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_btDbvt.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <BulletCollision/BroadphaseCollision/btDbvt.h>
// reference code for testing purposes
SIMD_FORCE_INLINE bool Intersect_ref( btDbvtAabbMm& a, btDbvtAabbMm& b)
{
return( (a.tMins().x()<=b.tMaxs().x())&&
(a.tMaxs().x()>=b.tMins().x())&&
(a.tMins().y()<=b.tMaxs().y())&&
(a.tMaxs().y()>=b.tMins().y())&&
(a.tMins().z()<=b.tMaxs().z())&&
(a.tMaxs().z()>=b.tMins().z()));
}
SIMD_FORCE_INLINE btScalar Proximity_ref( btDbvtAabbMm& a,
btDbvtAabbMm& b)
{
const btVector3 d=(a.tMins()+a.tMaxs())-(b.tMins()+b.tMaxs());
return(btFabs(d.x())+btFabs(d.y())+btFabs(d.z()));
}
SIMD_FORCE_INLINE int Select_ref( btDbvtAabbMm& o,
btDbvtAabbMm& a,
btDbvtAabbMm& b)
{
return(Proximity_ref(o,a)<Proximity_ref(o,b)?0:1);
}
SIMD_FORCE_INLINE void Merge_ref( btDbvtAabbMm& a,
btDbvtAabbMm& b,
btDbvtAabbMm& r)
{
//
//Changing '3' into '4' to compare with the vector code which changes all 4 floats.
//Erwin: don't do this because the 4th component is ignore and not computed on non-vector code (there is no NEON version and scalar is just 3 components)
//
for(int i=0;i<3;++i)
{
if(a.tMins().m_floats[i]<b.tMins().m_floats[i])
r.tMins().m_floats[i] = a.tMins().m_floats[i];
else
r.tMins().m_floats[i] = b.tMins().m_floats[i];
if(a.tMaxs().m_floats[i]>b.tMaxs().m_floats[i])
r.tMaxs().m_floats[i]=a.tMaxs().m_floats[i];
else
r.tMaxs().m_floats[i]=b.tMaxs().m_floats[i];
}
}
/*
[0] float32_t 0.0318338
[1] float32_t 0.0309355
[2] float32_t 0.93264
[3] float32_t 0.88788
[0] float32_t 0.59133
[1] float32_t 0.478779
[2] float32_t 0.833354
[3] float32_t 0.186335
[0] float32_t 0.242578
[1] float32_t 0.0134696
[2] float32_t 0.383139
[3] float32_t 0.414653
[0] float32_t 0.067769
[1] float32_t 0.993127
[2] float32_t 0.484308
[3] float32_t 0.765338
*/
#define LOOPCOUNT 1000
#define NUM_CYCLES 10000
#define DATA_SIZE 1024
int Test_btDbvt(void)
{
btDbvtAabbMm a[DATA_SIZE], b[DATA_SIZE], c[DATA_SIZE];
btDbvtAabbMm a_ref[DATA_SIZE], b_ref[DATA_SIZE], c_ref[DATA_SIZE];
int i;
bool Intersect_Test_Res[DATA_SIZE], Intersect_Ref_Res[DATA_SIZE];
int Select_Test_Res[DATA_SIZE], Select_Ref_Res[DATA_SIZE];
for (i = 0; i < DATA_SIZE; i++)
{
a[i].tMins().m_floats[0] = (float)rand() / (float)RAND_MAX;
a[i].tMins().m_floats[1] = (float)rand() / (float)RAND_MAX;
a[i].tMins().m_floats[2] = (float)rand() / (float)RAND_MAX;
a[i].tMins().m_floats[3] = (float)rand() / (float)RAND_MAX;
a[i].tMaxs().m_floats[0] = (float)rand() / (float)RAND_MAX;
a[i].tMaxs().m_floats[1] = (float)rand() / (float)RAND_MAX;
a[i].tMaxs().m_floats[2] = (float)rand() / (float)RAND_MAX;
a[i].tMaxs().m_floats[3] = (float)rand() / (float)RAND_MAX;
b[i].tMins().m_floats[0] = (float)rand() / (float)RAND_MAX;
b[i].tMins().m_floats[1] = (float)rand() / (float)RAND_MAX;
b[i].tMins().m_floats[2] = (float)rand() / (float)RAND_MAX;
b[i].tMins().m_floats[3] = (float)rand() / (float)RAND_MAX;
b[i].tMaxs().m_floats[0] = (float)rand() / (float)RAND_MAX;
b[i].tMaxs().m_floats[1] = (float)rand() / (float)RAND_MAX;
b[i].tMaxs().m_floats[2] = (float)rand() / (float)RAND_MAX;
b[i].tMaxs().m_floats[3] = (float)rand() / (float)RAND_MAX;
c[i].tMins().m_floats[0] = (float)rand() / (float)RAND_MAX;
c[i].tMins().m_floats[1] = (float)rand() / (float)RAND_MAX;
c[i].tMins().m_floats[2] = (float)rand() / (float)RAND_MAX;
c[i].tMins().m_floats[3] = (float)rand() / (float)RAND_MAX;
c[i].tMaxs().m_floats[0] = (float)rand() / (float)RAND_MAX;
c[i].tMaxs().m_floats[1] = (float)rand() / (float)RAND_MAX;
c[i].tMaxs().m_floats[2] = (float)rand() / (float)RAND_MAX;
c[i].tMaxs().m_floats[3] = (float)rand() / (float)RAND_MAX;
a_ref[i].tMins().m_floats[0] = a[i].tMins().m_floats[0];
a_ref[i].tMins().m_floats[1] = a[i].tMins().m_floats[1];
a_ref[i].tMins().m_floats[2] = a[i].tMins().m_floats[2];
a_ref[i].tMins().m_floats[3] = a[i].tMins().m_floats[3];
a_ref[i].tMaxs().m_floats[0] = a[i].tMaxs().m_floats[0];
a_ref[i].tMaxs().m_floats[1] = a[i].tMaxs().m_floats[1];
a_ref[i].tMaxs().m_floats[2] = a[i].tMaxs().m_floats[2];
a_ref[i].tMaxs().m_floats[3] = a[i].tMaxs().m_floats[3];
b_ref[i].tMins().m_floats[0] = b[i].tMins().m_floats[0];
b_ref[i].tMins().m_floats[1] = b[i].tMins().m_floats[1];
b_ref[i].tMins().m_floats[2] = b[i].tMins().m_floats[2];
b_ref[i].tMins().m_floats[3] = b[i].tMins().m_floats[3];
b_ref[i].tMaxs().m_floats[0] = b[i].tMaxs().m_floats[0];
b_ref[i].tMaxs().m_floats[1] = b[i].tMaxs().m_floats[1];
b_ref[i].tMaxs().m_floats[2] = b[i].tMaxs().m_floats[2];
b_ref[i].tMaxs().m_floats[3] = b[i].tMaxs().m_floats[3];
c_ref[i].tMins().m_floats[0] = c[i].tMins().m_floats[0];
c_ref[i].tMins().m_floats[1] = c[i].tMins().m_floats[1];
c_ref[i].tMins().m_floats[2] = c[i].tMins().m_floats[2];
c_ref[i].tMins().m_floats[3] = c[i].tMins().m_floats[3];
c_ref[i].tMaxs().m_floats[0] = c[i].tMaxs().m_floats[0];
c_ref[i].tMaxs().m_floats[1] = c[i].tMaxs().m_floats[1];
c_ref[i].tMaxs().m_floats[2] = c[i].tMaxs().m_floats[2];
c_ref[i].tMaxs().m_floats[3] = c[i].tMaxs().m_floats[3];
}
#if 1
for (i = 0; i < DATA_SIZE; i++)
{
Intersect_Test_Res[i] = Intersect(a[i], b[i]);
Intersect_Ref_Res[i] = Intersect_ref(a_ref[i], b_ref[i]);
if(Intersect_Test_Res[i] != Intersect_Ref_Res[i])
{
printf("Diff on %d\n", i);
printf("a_mx_f[0] = %.3f, a_mx_f[1] = %.3f, a_mx_f[2] = %.3f, a_mx_f[3] = %.3f\n", a[i].tMaxs().m_floats[0], a[i].tMaxs().m_floats[1], a[i].tMaxs().m_floats[2], a[i].tMaxs().m_floats[3]);
printf("a_mi_f[0] = %.3f, a_mi_f[1] = %.3f, a_mi_f[2] = %.3f, a_mi_f[3] = %.3f\n", a[i].tMins().m_floats[0], a[i].tMins().m_floats[1], a[i].tMins().m_floats[2], a[i].tMins().m_floats[3]);
printf("b_mx_f[0] = %.3f, b_mx_f[1] = %.3f, b_mx_f[2] = %.3f, b_mx_f[3] = %.3f\n", b[i].tMaxs().m_floats[0], b[i].tMaxs().m_floats[1], b[i].tMaxs().m_floats[2], b[i].tMaxs().m_floats[3]);
printf("b_mi_f[0] = %.3f, b_mi_f[1] = %.3f, b_mi_f[2] = %.3f, b_mi_f[3] = %.3f\n", b[i].tMins().m_floats[0], b[i].tMins().m_floats[1], b[i].tMins().m_floats[2], b[i].tMins().m_floats[3]);
printf("a_mx_f_ref[0] = %.3f, a_mx_f_ref[1] = %.3f, a_mx_f_ref[2] = %.3f, a_mx_f_ref[3] = %.3f\n", a_ref[i].tMaxs().m_floats[0], a_ref[i].tMaxs().m_floats[1], a_ref[i].tMaxs().m_floats[2], a_ref[i].tMaxs().m_floats[3]);
printf("a_mi_f_ref[0] = %.3f, a_mi_f_ref[1] = %.3f, a_mi_f_ref[2] = %.3f, a_mi_f_ref[3] = %.3f\n", a_ref[i].tMins().m_floats[0], a_ref[i].tMins().m_floats[1], a_ref[i].tMins().m_floats[2], a_ref[i].tMins().m_floats[3]);
printf("b_mx_f_ref[0] = %.3f, b_mx_f_ref[1] = %.3f, b_mx_f_ref[2] = %.3f, b_mx_f_ref[3] = %.3f\n", b_ref[i].tMaxs().m_floats[0], b_ref[i].tMaxs().m_floats[1], b_ref[i].tMaxs().m_floats[2], b_ref[i].tMaxs().m_floats[3]);
printf("b_mi_f_ref[0] = %.3f, b_mi_f_ref[1] = %.3f, b_mi_f_ref[2] = %.3f, b_mi_f_ref[3] = %.3f\n", b_ref[i].tMins().m_floats[0], b_ref[i].tMins().m_floats[1], b_ref[i].tMins().m_floats[2], b_ref[i].tMins().m_floats[3]);
}
}
#endif
uint64_t scalarTime;
uint64_t vectorTime;
size_t j;
////////////////////////////////////
//
// Time and Test Intersect
//
////////////////////////////////////
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for (i = 0; i < DATA_SIZE; i++)
{
Intersect_Ref_Res[i] = Intersect_ref(a_ref[i], b_ref[i]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for (i = 0; i < DATA_SIZE; i++)
{
Intersect_Test_Res[i] = Intersect(a[i], b[i]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Intersect Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT );
//printf("scalar = %llu, vector = %llu\n", scalarTime, vectorTime);
for (i = 0; i < DATA_SIZE; i++)
{
if(Intersect_Test_Res[i] != Intersect_Ref_Res[i])
{
printf("Intersect fail at %d\n", i);
return 1;
}
}
////////////////////////////////////
//
// Time and Test Merge
//
////////////////////////////////////
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for (i = 0; i < DATA_SIZE; i++)
{
Merge_ref(a_ref[i], b_ref[i], c_ref[i]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for (i = 0; i < DATA_SIZE; i++)
{
Merge(a[i], b[i], c[i]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Merge Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT );
//printf("scalar = %llu, vector = %llu\n", scalarTime, vectorTime);
/*
c [0] float32_t 0.00455523
[1] float32_t 0.559712
[2] float32_t 0.0795838
[3] float32_t 0.10182
c_ref
[0] float32_t 0.00455523
[1] float32_t 0.559712
[2] float32_t 0.0795838
[3] float32_t 0.552081
c [0] float32_t 0.829904
[1] float32_t 0.692891
[2] float32_t 0.961654
[3] float32_t 0.666956
c_ref
[0] float32_t 0.829904
[1] float32_t 0.692891
[2] float32_t 0.961654
[3] float32_t 0.522878
*/
for (i = 0; i < DATA_SIZE; i++)
{
//ignore 4th component because it is not computed in all code-paths
if( (fabs(c[i].tMaxs().m_floats[0] - c_ref[i].tMaxs().m_floats[0]) > 0.001) ||
(fabs(c[i].tMaxs().m_floats[1] - c_ref[i].tMaxs().m_floats[1]) > 0.001) ||
(fabs(c[i].tMaxs().m_floats[2] - c_ref[i].tMaxs().m_floats[2]) > 0.001) ||
// (fabs(c[i].tMaxs().m_floats[3] - c_ref[i].tMaxs().m_floats[3]) > 0.001) ||
(fabs(c[i].tMins().m_floats[0] - c_ref[i].tMins().m_floats[0]) > 0.001) ||
(fabs(c[i].tMins().m_floats[1] - c_ref[i].tMins().m_floats[1]) > 0.001) ||
(fabs(c[i].tMins().m_floats[2] - c_ref[i].tMins().m_floats[2]) > 0.001)
//|| (fabs(c[i].tMins().m_floats[3] - c_ref[i].tMins().m_floats[3]) > 0.001)
)
//if((c[i].tMaxs().m_floats[0] != c_ref[i].tMaxs().m_floats[0]) || (c[i].tMaxs().m_floats[1] != c_ref[i].tMaxs().m_floats[1]) || (c[i].tMaxs().m_floats[2] != c_ref[i].tMaxs().m_floats[2]) || (c[i].tMaxs().m_floats[3] != c_ref[i].tMaxs().m_floats[3]) || (c[i].tMins().m_floats[0] != c_ref[i].tMins().m_floats[0]) || (c[i].tMins().m_floats[1] != c_ref[i].tMins().m_floats[1]) || (c[i].tMins().m_floats[2] != c_ref[i].tMins().m_floats[2]) || (c[i].tMins().m_floats[3] != c_ref[i].tMins().m_floats[3]))
{
printf("Merge fail at %d with test = %d, ref = %d\n", i, Select_Test_Res[i], Select_Ref_Res[i]);
printf("a_mx_f[0] = %.3f, a_mx_f[1] = %.3f, a_mx_f[2] = %.3f, a_mx_f[3] = %.3f\n", a[i].tMaxs().m_floats[0], a[i].tMaxs().m_floats[1], a[i].tMaxs().m_floats[2], a[i].tMaxs().m_floats[3]);
printf("a_mi_f[0] = %.3f, a_mi_f[1] = %.3f, a_mi_f[2] = %.3f, a_mi_f[3] = %.3f\n", a[i].tMins().m_floats[0], a[i].tMins().m_floats[1], a[i].tMins().m_floats[2], a[i].tMins().m_floats[3]);
printf("b_mx_f[0] = %.3f, b_mx_f[1] = %.3f, b_mx_f[2] = %.3f, b_mx_f[3] = %.3f\n", b[i].tMaxs().m_floats[0], b[i].tMaxs().m_floats[1], b[i].tMaxs().m_floats[2], b[i].tMaxs().m_floats[3]);
printf("b_mi_f[0] = %.3f, b_mi_f[1] = %.3f, b_mi_f[2] = %.3f, b_mi_f[3] = %.3f\n", b[i].tMins().m_floats[0], b[i].tMins().m_floats[1], b[i].tMins().m_floats[2], b[i].tMins().m_floats[3]);
printf("c_mx_f[0] = %.3f, c_mx_f[1] = %.3f, c_mx_f[2] = %.3f, c_mx_f[3] = %.3f\n", c[i].tMaxs().m_floats[0], c[i].tMaxs().m_floats[1], c[i].tMaxs().m_floats[2], c[i].tMaxs().m_floats[3]);
printf("c_mi_f[0] = %.3f, c_mi_f[1] = %.3f, c_mi_f[2] = %.3f, c_mi_f[3] = %.3f\n", c[i].tMins().m_floats[0], c[i].tMins().m_floats[1], c[i].tMins().m_floats[2], c[i].tMins().m_floats[3]);
printf("a_mx_f_ref[0] = %.3f, a_mx_f_ref[1] = %.3f, a_mx_f_ref[2] = %.3f, a_mx_f_ref[3] = %.3f\n", a_ref[i].tMaxs().m_floats[0], a_ref[i].tMaxs().m_floats[1], a_ref[i].tMaxs().m_floats[2], a_ref[i].tMaxs().m_floats[3]);
printf("a_mi_f_ref[0] = %.3f, a_mi_f_ref[1] = %.3f, a_mi_f_ref[2] = %.3f, a_mi_f_ref[3] = %.3f\n", a_ref[i].tMins().m_floats[0], a_ref[i].tMins().m_floats[1], a_ref[i].tMins().m_floats[2], a_ref[i].tMins().m_floats[3]);
printf("b_mx_f_ref[0] = %.3f, b_mx_f_ref[1] = %.3f, b_mx_f_ref[2] = %.3f, b_mx_f_ref[3] = %.3f\n", b_ref[i].tMaxs().m_floats[0], b_ref[i].tMaxs().m_floats[1], b_ref[i].tMaxs().m_floats[2], b_ref[i].tMaxs().m_floats[3]);
printf("b_mi_f_ref[0] = %.3f, b_mi_f_ref[1] = %.3f, b_mi_f_ref[2] = %.3f, b_mi_f_ref[3] = %.3f\n", b_ref[i].tMins().m_floats[0], b_ref[i].tMins().m_floats[1], b_ref[i].tMins().m_floats[2], b_ref[i].tMins().m_floats[3]);
printf("c_mx_f_ref[0] = %.3f, c_mx_f_ref[1] = %.3f, c_mx_f_ref[2] = %.3f, c_mx_f_ref[3] = %.3f\n", c_ref[i].tMaxs().m_floats[0], c_ref[i].tMaxs().m_floats[1], c_ref[i].tMaxs().m_floats[2], c_ref[i].tMaxs().m_floats[3]);
printf("c_mi_f_ref[0] = %.3f, c_mi_f_ref[1] = %.3f, c_mi_f_ref[2] = %.3f, c_mi_f_ref[3] = %.3f\n", c_ref[i].tMins().m_floats[0], c_ref[i].tMins().m_floats[1], c_ref[i].tMins().m_floats[2], c_ref[i].tMins().m_floats[3]);
return 1;
}
}
////////////////////////////////////
//
// Time and Test Select
//
////////////////////////////////////
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for (i = 0; i < DATA_SIZE; i++)
{
Select_Ref_Res[i] = Select_ref(a_ref[i], b_ref[i], c_ref[i]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for (i = 0; i < DATA_SIZE; i++)
{
Select_Test_Res[i] = Select(a[i], b[i], c[i]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Select Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT );
//printf("scalar = %llu, vector = %llu\n", scalarTime, vectorTime);
for (i = 0; i < DATA_SIZE; i++)
{
Select_Ref_Res[i] = Select_ref(a_ref[i], b_ref[i], c_ref[i]);
Select_Test_Res[i] = Select(a[i], b[i], c[i]);
if(Select_Test_Res[i] != Select_Ref_Res[i])
{
printf("Select fail at %d with test = %d, ref = %d\n", i, Select_Test_Res[i], Select_Ref_Res[i]);
printf("a_mx_f[0] = %.3f, a_mx_f[1] = %.3f, a_mx_f[2] = %.3f, a_mx_f[3] = %.3f\n", a[i].tMaxs().m_floats[0], a[i].tMaxs().m_floats[1], a[i].tMaxs().m_floats[2], a[i].tMaxs().m_floats[3]);
printf("a_mi_f[0] = %.3f, a_mi_f[1] = %.3f, a_mi_f[2] = %.3f, a_mi_f[3] = %.3f\n", a[i].tMins().m_floats[0], a[i].tMins().m_floats[1], a[i].tMins().m_floats[2], a[i].tMins().m_floats[3]);
printf("b_mx_f[0] = %.3f, b_mx_f[1] = %.3f, b_mx_f[2] = %.3f, b_mx_f[3] = %.3f\n", b[i].tMaxs().m_floats[0], b[i].tMaxs().m_floats[1], b[i].tMaxs().m_floats[2], b[i].tMaxs().m_floats[3]);
printf("b_mi_f[0] = %.3f, b_mi_f[1] = %.3f, b_mi_f[2] = %.3f, b_mi_f[3] = %.3f\n", b[i].tMins().m_floats[0], b[i].tMins().m_floats[1], b[i].tMins().m_floats[2], b[i].tMins().m_floats[3]);
printf("c_mx_f[0] = %.3f, c_mx_f[1] = %.3f, c_mx_f[2] = %.3f, c_mx_f[3] = %.3f\n", c[i].tMaxs().m_floats[0], c[i].tMaxs().m_floats[1], c[i].tMaxs().m_floats[2], c[i].tMaxs().m_floats[3]);
printf("c_mi_f[0] = %.3f, c_mi_f[1] = %.3f, c_mi_f[2] = %.3f, c_mi_f[3] = %.3f\n", c[i].tMins().m_floats[0], c[i].tMins().m_floats[1], c[i].tMins().m_floats[2], c[i].tMins().m_floats[3]);
printf("a_mx_f_ref[0] = %.3f, a_mx_f_ref[1] = %.3f, a_mx_f_ref[2] = %.3f, a_mx_f_ref[3] = %.3f\n", a_ref[i].tMaxs().m_floats[0], a_ref[i].tMaxs().m_floats[1], a_ref[i].tMaxs().m_floats[2], a_ref[i].tMaxs().m_floats[3]);
printf("a_mi_f_ref[0] = %.3f, a_mi_f_ref[1] = %.3f, a_mi_f_ref[2] = %.3f, a_mi_f_ref[3] = %.3f\n", a_ref[i].tMins().m_floats[0], a_ref[i].tMins().m_floats[1], a_ref[i].tMins().m_floats[2], a_ref[i].tMins().m_floats[3]);
printf("b_mx_f_ref[0] = %.3f, b_mx_f_ref[1] = %.3f, b_mx_f_ref[2] = %.3f, b_mx_f_ref[3] = %.3f\n", b_ref[i].tMaxs().m_floats[0], b_ref[i].tMaxs().m_floats[1], b_ref[i].tMaxs().m_floats[2], b_ref[i].tMaxs().m_floats[3]);
printf("b_mi_f_ref[0] = %.3f, b_mi_f_ref[1] = %.3f, b_mi_f_ref[2] = %.3f, b_mi_f_ref[3] = %.3f\n", b_ref[i].tMins().m_floats[0], b_ref[i].tMins().m_floats[1], b_ref[i].tMins().m_floats[2], b_ref[i].tMins().m_floats[3]);
printf("c_mx_f_ref[0] = %.3f, c_mx_f_ref[1] = %.3f, c_mx_f_ref[2] = %.3f, c_mx_f_ref[3] = %.3f\n", c_ref[i].tMaxs().m_floats[0], c_ref[i].tMaxs().m_floats[1], c_ref[i].tMaxs().m_floats[2], c_ref[i].tMaxs().m_floats[3]);
printf("c_mi_f_ref[0] = %.3f, c_mi_f_ref[1] = %.3f, c_mi_f_ref[2] = %.3f, c_mi_f_ref[3] = %.3f\n", c_ref[i].tMins().m_floats[0], c_ref[i].tMins().m_floats[1], c_ref[i].tMins().m_floats[2], c_ref[i].tMins().m_floats[3]);
return 1;
}
}
return 0;
}
#endif

View File

@@ -0,0 +1,21 @@
//
// Test_btDbvt.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc., Inc.
//
#ifndef BulletTest_Test_btDbvt_h
#define BulletTest_Test_btDbvt_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_btDbvt(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,153 @@
//
// Test_v3dot.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_dot3.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static btVector3 dot3_ref( const btVector3 &, const btVector3 &, const btVector3 &, const btVector3 &);
static btVector3 dot3_ref( const btVector3 &v, const btVector3 &v1, const btVector3 &v2, const btVector3 &v3)
{
return btVector3( v.dot(v1), v.dot(v2), v.dot(v3));
}
/*
SIMD_FORCE_INLINE int operator!=(const btVector3 &s, const btVector3 &v)
{
#ifdef __SSE__
__m128 test = _mm_cmpneq_ps( s.mVec128, v.mVec128 );
return (_mm_movemask_ps( test ) & 7) != 0;
#elif defined __ARM_NEON_H
uint32x4_t test = vandq_u32( vceqq_f32( s.mVec128, v.mVec128 ), (uint32x4_t){-1,-1,-1,0});
uint32x2_t t = vpadd_u32( vget_low_u32(test), vget_high_u32(test));
t = vpadd_u32(t, t);
return -3 != (int32_t) vget_lane_u32(t, 0);
#else
return s.m_floats[0] != v.m_floats[0] ||
s.m_floats[1] != v.m_floats[1] ||
s.m_floats[2] != v.m_floats[2];
#endif
}
*/
#define LOOPCOUNT 1000
#define NUM_CYCLES 10000
int Test_dot3(void)
{
btVector3 v, v1, v2, v3;
#define DATA_SIZE 1024
btVector3 vec3_arr[DATA_SIZE];
btVector3 vec3_arr1[DATA_SIZE];
btVector3 vec3_arr2[DATA_SIZE];
btVector3 vec3_arr3[DATA_SIZE];
btVector3 res_arr[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
btVector3 correct, test;
for( k = 0; k < DATA_SIZE; k++ )
{
vec3_arr[k] = btVector3( btAssign128( RANDF, RANDF, RANDF, BT_NAN));
vec3_arr1[k] = btVector3( btAssign128( RANDF, RANDF, RANDF, BT_NAN));
vec3_arr2[k] = btVector3( btAssign128( RANDF, RANDF, RANDF, BT_NAN ));
vec3_arr3[k] = btVector3( btAssign128( RANDF, RANDF, RANDF, BT_NAN));
correct = dot3_ref(vec3_arr[k], vec3_arr1[k], vec3_arr2[k], vec3_arr3[k]);
test = vec3_arr[k].dot3( vec3_arr1[k], vec3_arr2[k], vec3_arr3[k]);
if( correct != test )
{
vlog( "Error (%ld) - dot3 result error! *{%a, %a, %a, %a} != {%a, %a, %a, %a} \n", k,
correct.x(), correct.y(), correct.z(), correct.w(),
test.x(), test.y(), test.z(), test.w() );
return 1;
}
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
{
size_t k32 = (k & (DATA_SIZE-1));
res_arr[k32] = dot3_ref( vec3_arr[k32], vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]); k32++;
res_arr[k32] = dot3_ref( vec3_arr[k32], vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]); k32++;
res_arr[k32] = dot3_ref( vec3_arr[k32], vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]); k32++;
res_arr[k32] = dot3_ref( vec3_arr[k32], vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
{
size_t k32 = (k & (DATA_SIZE-1));
res_arr[k32] = vec3_arr[k32].dot3( vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]); k32++;
res_arr[k32] = vec3_arr[k32].dot3( vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]); k32++;
res_arr[k32] = vec3_arr[k32].dot3( vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]); k32++;
res_arr[k32] = vec3_arr[k32].dot3( vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT );
return 0;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_mindot.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_dot3_h
#define BulletTest_Test_dot3_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_dot3(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,281 @@
//
// Test_maxdot.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_maxdot.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static long maxdot_ref( const btSimdFloat4 *vertices,
float *vec,
size_t count,
float *dotResult );
#ifdef __arm__
#define MAX_LOG2_SIZE 9
#else
#define MAX_LOG2_SIZE 10
#endif
#define MAX_SIZE (1U << MAX_LOG2_SIZE)
#define LOOPCOUNT 10
int Test_maxdot(void)
{
// Init an array flanked by guard pages
btSimdFloat4 *data = (btSimdFloat4*) GuardCalloc( 1, MAX_SIZE * sizeof(btSimdFloat4), NULL );
float *fp = (float*) data;
long correct, test;
btVector3 localScaling( 0.1f, 0.2f, 0.3f);
size_t size;
// Init the data
size_t i;
for( i = 0; i < MAX_SIZE; i++ )
{
fp[4*i] = (int32_t) RANDF_16;
fp[4*i+1] = (int32_t) RANDF_16;
fp[4*i+2] = (int32_t) RANDF_16;
fp[4*i+3] = BT_NAN; // w channel NaN
}
float correctDot, testDot;
fp = (float*) localScaling;
float maxRelativeError = 0.f;
for( size = 1; size <= MAX_SIZE; size++ )
{
float *in = (float*)(data + MAX_SIZE - size);
size_t position;
for( position = 0; position < size; position++ )
{
float *biggest = in + position * 4;
float old[4] = { biggest[0], biggest[1], biggest[2], biggest[3] };
biggest[0] += LARGE_FLOAT17;
biggest[1] += LARGE_FLOAT17;
biggest[2] += LARGE_FLOAT17;
biggest[3] += LARGE_FLOAT17;
correctDot = BT_NAN;
testDot = BT_NAN;
correct = maxdot_ref( (btSimdFloat4*) in, (float*) &localScaling, size, &correctDot);
test = localScaling.maxDot( (btVector3*) in, size, testDot);
if( test < 0 || test >= size )
{
vlog( "Error @ %ld: index out of bounds! *%ld vs %ld \n", size, correct, test);
continue;
}
if( correct != test )
{
vlog( "Error @ %ld: index misreported! *%ld vs %ld (*%f, %f)\n", size, correct, test,
fp[0] * in[4*correct] + fp[1] * in[4*correct+1] + fp[2] * in[4*correct+2],
fp[0] * in[4*test] + fp[1] * in[4*test+1] + fp[2] * in[4*test+2] );
return 1;
}
if( test != position )
{
vlog( "Biggest not found where it is supposed to be: *%ld vs %ld (*%f, %f)\n", position, test,
fp[0] * in[4*test] + fp[1] * in[4*test+1] + fp[2] * in[4*test+2],
fp[0] * in[4*position] + fp[1] * in[4*position+1] + fp[2] * in[4*position+2] );
return 1;
}
if( correctDot != testDot )
{
float relativeError = btFabs((testDot - correctDot) / correctDot);
if (relativeError>1e-6)
{
vlog( "Error @ %ld: dotpr misreported! *%f vs %f (*%f, %f)\n", size, correctDot, testDot,
fp[0] * in[4*correct] + fp[1] * in[4*correct+1] + fp[2] * in[4*correct+2],
fp[0] * in[4*test] + fp[1] * in[4*test+1] + fp[2] * in[4*test+2] );
return 1;
} else
{
if (maxRelativeError < relativeError)
{
maxRelativeError = relativeError;
#ifdef VERBOSE_WARNING
sprintf(errStr,"Warning @ %ld: dotpr misreported! *%f vs %f (*%f, %f)\n", size, correctDot, testDot,
fp[0] * in[4*correct] + fp[1] * in[4*correct+1] + fp[2] * in[4*correct+2],
fp[0] * in[4*test] + fp[1] * in[4*test+1] + fp[2] * in[4*test+2]);
#endif //VERBOSE_WARNING
}
}
}
memcpy( biggest, old, 16 );
}
}
if (maxRelativeError)
{
printf("Warning: relative error = %e\n", maxRelativeError);
#ifdef VERBOSE_WARNING
vlog(errStr);
#endif
}
uint64_t scalarTimes[33 + (MAX_LOG2_SIZE-5)];
uint64_t vectorTimes[33 + (MAX_LOG2_SIZE-5)];
size_t j, k;
float *in = (float*) data;
for( size = 1; size <= 32; size++ )
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTimes[size] = 0;
for (j = 0; j < 100; j++) {
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
correct += maxdot_ref( (btSimdFloat4*) in, (float*) &localScaling, size, &correctDot);
currentTime = ReadTicks() - startTime;
scalarTimes[size] += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTimes[size] = bestTime;
else
scalarTimes[size] /= 100;
}
uint64_t *timep = &scalarTimes[33];
for( size = 64; size <= MAX_SIZE; size *= 2 )
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
timep[0] =0;
for (j = 0; j < 100; j++) {
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
correct += maxdot_ref( (btSimdFloat4*) in, (float*) &localScaling, size, &correctDot);
currentTime = ReadTicks() - startTime;
timep[0] += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
timep[0] = bestTime;
else
timep[0] /= 100;
timep++;
}
for( size = 1; size <= 32; size++ )
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTimes[size] = 0;
for (j = 0; j < 100; j++) {
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
test += localScaling.maxDot( (btVector3*) in, size, testDot);
currentTime = ReadTicks() - startTime;
vectorTimes[size] += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTimes[size] = bestTime;
else
vectorTimes[size] /= 100;
}
timep = &vectorTimes[33];
for( size = 64; size <= MAX_SIZE; size *= 2 )
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
timep[0] =0;
for (j = 0; j < 100; j++) {
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
test += localScaling.maxDot( (btVector3*) in, size, testDot);
currentTime = ReadTicks() - startTime;
timep[0] += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
timep[0] = bestTime;
else
timep[0] /= 100;
timep++;
}
vlog( "Timing:\n" );
vlog( " size\t scalar\t vector\n" );
for( size = 1; size <= 32; size++ )
vlog( "%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles( scalarTimes[size] ) / LOOPCOUNT, TicksToCycles( vectorTimes[size] ) / LOOPCOUNT );
size_t index = 33;
for( size = 64; size <= MAX_SIZE; size *= 2 )
{
vlog( "%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles( scalarTimes[index] ) / LOOPCOUNT, TicksToCycles( vectorTimes[index] ) / LOOPCOUNT );
index++;
}
// Useless check to make sure that the timing loops are not optimized away
if( test != correct )
vlog( "Error: Test != correct: *%ld vs. %ld\n", correct, test);
GuardFree(data);
return 0;
}
static long maxdot_ref( const btSimdFloat4 *vertices,
float *vec,
size_t count,
float *dotResult )
{
const float *dp = (const float*) vertices;
float maxDot = -BT_INFINITY;
long i = 0;
long ptIndex = -1;
for( i = 0; i < count; i++ )
{
float dot = vec[0] * dp[0] + vec[1] * dp[1] + vec[2] * dp[2]; dp += 4;
if( dot > maxDot )
{
maxDot = dot;
ptIndex = i;
}
}
*dotResult = maxDot;
return ptIndex;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_maxdot.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_maxdot_h
#define BulletTest_Test_maxdot_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_maxdot(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,269 @@
//
// Test_mindot.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_mindot.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static long mindot_ref( const btSimdFloat4 *vertices,
float *vec,
size_t count,
float *dotResult );
#ifdef __arm__
#define MAX_LOG2_SIZE 9
#else
#define MAX_LOG2_SIZE 9
#endif
#define MAX_SIZE (1U << MAX_LOG2_SIZE)
#define LOOPCOUNT 100
int Test_mindot(void)
{
// Init an array flanked by guard pages
btSimdFloat4 *data = (btSimdFloat4*) GuardCalloc( 1, MAX_SIZE * sizeof(btSimdFloat4), NULL );
float *fp = (float*) data;
long correct, test;
btVector3 localScaling( 0.1f, 0.2f, 0.3f);
size_t size;
// Init the data
size_t i;
for( i = 0; i < MAX_SIZE; i++ )
{
fp[4*i] = (int32_t) RANDF_16;
fp[4*i+1] = (int32_t) RANDF_16;
fp[4*i+2] = (int32_t) RANDF_16;
fp[4*i+3] = BT_NAN; // w channel NaN
}
float correctDot, testDot;
fp = (float*) localScaling;
float maxRelativeError = 0.f;
for( size = 1; size <= MAX_SIZE; size++ )
{
float *in = (float*)(data + MAX_SIZE - size);
size_t position;
for( position = 0; position < size; position++ )
{
float *biggest = in + position * 4;
float old[4] = { biggest[0], biggest[1], biggest[2], biggest[3] };
biggest[0] -= LARGE_FLOAT17;
biggest[1] -= LARGE_FLOAT17;
biggest[2] -= LARGE_FLOAT17;
biggest[3] -= LARGE_FLOAT17;
correctDot = BT_NAN;
testDot = BT_NAN;
correct = mindot_ref( (btSimdFloat4*) in, (float*) &localScaling, size, &correctDot);
test = localScaling.minDot( (btVector3*) in, size, testDot);
if( test < 0 || test >= size )
{
vlog( "Error @ %ld: index out of bounds! *%ld vs %ld \n", size, correct, test);
continue;
}
if( correct != test )
{
vlog( "Error @ %ld: index misreported! *%ld vs %ld (*%f, %f)\n", size, correct, test,
fp[0] * in[4*correct] + fp[1] * in[4*correct+1] + fp[2] * in[4*correct+2],
fp[0] * in[4*test] + fp[1] * in[4*test+1] + fp[2] * in[4*test+2] );
return 1;
}
if( test != position )
{
vlog( "Biggest not found where it is supposed to be: *%ld vs %ld (*%f, %f)\n", position, test,
fp[0] * in[4*test] + fp[1] * in[4*test+1] + fp[2] * in[4*test+2],
fp[0] * in[4*position] + fp[1] * in[4*position+1] + fp[2] * in[4*position+2] );
return 1;
}
if( correctDot != testDot )
{
float relativeError = btFabs((testDot - correctDot) / correctDot);
if (relativeError>1e6)
{
vlog( "Error @ %ld: dotpr misreported! *%f vs %f (*%f, %f)\n", size, correctDot, testDot,
fp[0] * in[4*correct] + fp[1] * in[4*correct+1] + fp[2] * in[4*correct+2],
fp[0] * in[4*test] + fp[1] * in[4*test+1] + fp[2] * in[4*test+2] );
return 1;
} else
{
if (maxRelativeError < relativeError)
{
maxRelativeError = relativeError;
}
}
}
memcpy( biggest, old, 16 );
}
}
if (maxRelativeError)
{
printf("Warning: relative error = %e\n", maxRelativeError);
}
uint64_t scalarTimes[33 + (MAX_LOG2_SIZE-5)];
uint64_t vectorTimes[33 + (MAX_LOG2_SIZE-5)];
size_t j, k;
float *in = (float*) data;
for( size = 1; size <= 32; size++ )
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTimes[size] = 0;
for (j = 0; j < 100; j++) {
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
correct += mindot_ref( (btSimdFloat4*) in, (float*) &localScaling, size, &correctDot);
currentTime = ReadTicks() - startTime;
scalarTimes[size] += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTimes[size] = bestTime;
else
scalarTimes[size] /= 100;
}
uint64_t *timep = &scalarTimes[33];
for( size = 64; size <= MAX_SIZE; size *= 2 )
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
timep[0] =0;
for (j = 0; j < 100; j++) {
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
correct += mindot_ref( (btSimdFloat4*) in, (float*) &localScaling, size, &correctDot);
currentTime = ReadTicks() - startTime;
timep[0] += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
timep[0] = bestTime;
else
timep[0] /= 100;
timep++;
}
for( size = 1; size <= 32; size++ )
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTimes[size] = 0;
for (j = 0; j < 100; j++) {
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
test += localScaling.minDot( (btVector3*) in, size, testDot);
currentTime = ReadTicks() - startTime;
vectorTimes[size] += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTimes[size] = bestTime;
else
vectorTimes[size] /= 100;
}
timep = &vectorTimes[33];
for( size = 64; size <= MAX_SIZE; size *= 2 )
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
timep[0] =0;
for (j = 0; j < 100; j++) {
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
test += localScaling.minDot( (btVector3*) in, size, testDot);
currentTime = ReadTicks() - startTime;
timep[0] += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
timep[0] = bestTime;
else
timep[0] /= 100;
timep++;
}
vlog( "Timing:\n" );
vlog( " size\t scalar\t vector\n" );
for( size = 1; size <= 32; size++ )
vlog( "%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles( scalarTimes[size] ) / LOOPCOUNT, TicksToCycles( vectorTimes[size] ) / LOOPCOUNT );
size_t index = 33;
for( size = 64; size <= MAX_SIZE; size *= 2 )
{
vlog( "%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles( scalarTimes[index] ) / LOOPCOUNT, TicksToCycles( vectorTimes[index] ) / LOOPCOUNT );
index++;
}
// Useless check to make sure that the timing loops are not optimized away
if( test != correct )
vlog( "Error: Test != correct: *%ld vs. %ld\n", correct, test);
GuardFree(data);
return 0;
}
static long mindot_ref( const btSimdFloat4 *vertices,
float *vec,
size_t count,
float *dotResult )
{
const float *dp = (const float*) vertices;
float minDot = BT_INFINITY;
long i = 0;
long ptIndex = -1;
for( i = 0; i < count; i++ )
{
float dot = vec[0] * dp[0] + vec[1] * dp[1] + vec[2] * dp[2]; dp += 4;
if( dot < minDot )
{
minDot = dot;
ptIndex = i;
}
}
*dotResult = minDot;
return ptIndex;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_mindot.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_mindot_h
#define BulletTest_Test_mindot_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_mindot(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,162 @@
//
// Test_qtdot.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_qtdot.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btQuaternion.h>
#define BT_OP(a, b) (a.dot(b))
// reference code for testing purposes
static inline btScalar qtdot_ref(btQuaternion& q1, btQuaternion& q2);
static inline btScalar qtdot_ref(btQuaternion& q1, btQuaternion& q2)
{
return
q1.x() * q2.x() +
q1.y() * q2.y() +
q1.z() * q2.z() +
q1.w() * q2.w();
}
#define LOOPCOUNT 1024
#define NUM_CYCLES 1000
int Test_qtdot(void)
{
btQuaternion q1, q2;
float x, y, z, w, vNaN;
vNaN = BT_NAN; // w channel NaN
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
q1.setValue(x,y,z,w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
q2.setValue(x,y,z,w);
btScalar correct_res, test_res;
{
correct_res = vNaN;
test_res = vNaN;
correct_res = qtdot_ref(q1, q2);
test_res = BT_OP(q1,q2);
if( fabsf(correct_res - test_res) > FLT_EPSILON*4 )
{
vlog( "Error - qtdot result error! "
"\ncorrect = %10.4f "
"\ntested = %10.4f \n",
correct_res, test_res);
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btQuaternion qt_arr1[DATA_SIZE];
btQuaternion qt_arr2[DATA_SIZE];
btScalar res_arr[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
qt_arr1[k].setValue(x,y,z,w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
qt_arr2[k].setValue(x,y,z,w);
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
{
size_t km = (k & (DATA_SIZE-1));
res_arr[km] = qtdot_ref(qt_arr1[km], qt_arr2[km]);km++;
res_arr[km] = qtdot_ref(qt_arr1[km], qt_arr2[km]);km++;
res_arr[km] = qtdot_ref(qt_arr1[km], qt_arr2[km]);km++;
res_arr[km] = qtdot_ref(qt_arr1[km], qt_arr2[km]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
{
size_t km = (k & (DATA_SIZE-1));
res_arr[km] = BT_OP(qt_arr1[km], qt_arr2[km]);km++;
res_arr[km] = BT_OP(qt_arr1[km], qt_arr2[km]);km++;
res_arr[km] = BT_OP(qt_arr1[km], qt_arr2[km]);km++;
res_arr[km] = BT_OP(qt_arr1[km], qt_arr2[km]);km++;
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
return 0;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_qtdot.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_qtdot_h
#define BulletTest_Test_qtdot_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_qtdot(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,183 @@
//
// Test_qtmul.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_qtmul.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btQuaternion.h>
#define BT_OP(a, b) ((a) *= (b))
// reference code for testing purposes
static inline btQuaternion& qtmul_ref(btQuaternion& q1, btQuaternion& q2);
static inline btQuaternion& qtmul_ref(btQuaternion& q1, btQuaternion& q2)
{
float x,y,z,w;
x = q1.w() * q2.x() + q1.x() * q2.w() + q1.y() * q2.z() - q1.z() * q2.y(),
y = q1.w() * q2.y() + q1.y() * q2.w() + q1.z() * q2.x() - q1.x() * q2.z(),
z = q1.w() * q2.z() + q1.z() * q2.w() + q1.x() * q2.y() - q1.y() * q2.x(),
w = q1.w() * q2.w() - q1.x() * q2.x() - q1.y() * q2.y() - q1.z() * q2.z();
q1.setValue(x, y, z, w);
return q1;
}
#define LOOPCOUNT 1024
#define NUM_CYCLES 1000
int Test_qtmul(void)
{
btQuaternion q1, q2, q3;
float x, y, z, w, vNaN;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
vNaN = BT_NAN; // w channel NaN
q1.setValue(x,y,z,w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
q2.setValue(x,y,z,w);
q3 = q1;
btQuaternion correct_res, test_res;
{
float vNaN = BT_NAN;
correct_res.setValue(vNaN, vNaN, vNaN, vNaN);
test_res.setValue(vNaN, vNaN, vNaN, vNaN);
correct_res = qtmul_ref(q1, q2);
test_res = BT_OP(q3,q2);
if( fabsf(correct_res.x() - test_res.x()) +
fabsf(correct_res.y() - test_res.y()) +
fabsf(correct_res.z() - test_res.z()) +
fabsf(correct_res.w() - test_res.w()) > FLT_EPSILON*10 )
{
vlog( "Error - qtmul result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f, %10.4f) \n",
correct_res.x(), correct_res.y(),
correct_res.z(), correct_res.w(),
test_res.x(), test_res.y(),
test_res.z(), test_res.w());
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btQuaternion qt_arr1[DATA_SIZE];
btQuaternion qt_arr2[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
qt_arr1[k].setValue(x,y,z,w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
qt_arr2[k].setValue(x,y,z,w);
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
qt_arr1[k] = qtmul_ref(qt_arr1[k], qt_arr2[k]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
qt_arr1[k].setValue(x,y,z,w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
qt_arr2[k].setValue(x,y,z,w);
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
qt_arr1[k] = BT_OP(qt_arr1[k], qt_arr2[k]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
return 0;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_qtmul.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_qtmul_h
#define BulletTest_Test_qtmul_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_qtmul(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,162 @@
//
// Test_qtmulQV3.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_qtmulQV3.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btQuaternion.h>
#define BT_OP(a, b) ((a) * (b))
// reference code for testing purposes
static inline btQuaternion qtmulQV3_ref(const btQuaternion& q, const btVector3& w);
static inline btQuaternion qtmulQV3_ref(const btQuaternion& q, const btVector3& w)
{
return btQuaternion(
q.w() * w.x() + q.y() * w.z() - q.z() * w.y(),
q.w() * w.y() + q.z() * w.x() - q.x() * w.z(),
q.w() * w.z() + q.x() * w.y() - q.y() * w.x(),
-q.x() * w.x() - q.y() * w.y() - q.z() * w.z());
}
#define LOOPCOUNT 1024
#define NUM_CYCLES 1000
static inline btSimdFloat4 rand_f4(void)
{
return btAssign128( RANDF_m1p1, RANDF_m1p1, RANDF_m1p1, BT_NAN ); // w channel NaN
}
static inline btSimdFloat4 qtrand_f4(void)
{
return btAssign128( RANDF_m1p1, RANDF_m1p1, RANDF_m1p1, RANDF_m1p1 );
}
static inline btSimdFloat4 qtNAN_f4(void)
{
return btAssign128( BT_NAN, BT_NAN, BT_NAN, BT_NAN );
}
int Test_qtmulQV3(void)
{
btQuaternion q;
btVector3 v3;
// Init the data
q = btQuaternion(qtrand_f4());
v3 = btVector3(rand_f4());
btQuaternion correct_res, test_res;
correct_res = btQuaternion(qtNAN_f4());
test_res = btQuaternion(qtNAN_f4());
{
correct_res = qtmulQV3_ref(q, v3);
test_res = BT_OP(q, v3);
if( fabsf(correct_res.x() - test_res.x()) +
fabsf(correct_res.y() - test_res.y()) +
fabsf(correct_res.z() - test_res.z()) +
fabsf(correct_res.w() - test_res.w()) > FLT_EPSILON*8 )
{
vlog( "Error - qtmulQV3 result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f, %10.4f) \n",
correct_res.x(), correct_res.y(),
correct_res.z(), correct_res.w(),
test_res.x(), test_res.y(),
test_res.z(), test_res.w());
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btQuaternion qt_arrR[DATA_SIZE];
btQuaternion qt_arr[DATA_SIZE];
btVector3 v3_arr[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
qt_arr[k] = btQuaternion(qtrand_f4());
v3_arr[k] = btVector3(rand_f4());
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
qt_arrR[k] = qtmulQV3_ref(qt_arr[k], v3_arr[k]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
qt_arr[k] = btQuaternion(qtrand_f4());
v3_arr[k] = btVector3(rand_f4());
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
qt_arrR[k] = BT_OP(qt_arr[k], v3_arr[k]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
return 0;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_qtmulQV3.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_qtmulQV3_h
#define BulletTest_Test_qtmulQV3_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_qtmulQV3(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,161 @@
//
// Test_qtmulV3Q.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_qtmulV3Q.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btQuaternion.h>
#define BT_OP(a, b) ((a) * (b))
// reference code for testing purposes
static inline btQuaternion qtmulV3Q_ref(const btVector3& w, const btQuaternion& q);
static inline btQuaternion qtmulV3Q_ref(const btVector3& w, const btQuaternion& q)
{
return btQuaternion(
+w.x() * q.w() + w.y() * q.z() - w.z() * q.y(),
+w.y() * q.w() + w.z() * q.x() - w.x() * q.z(),
+w.z() * q.w() + w.x() * q.y() - w.y() * q.x(),
-w.x() * q.x() - w.y() * q.y() - w.z() * q.z());
}
#define LOOPCOUNT 1024
#define NUM_CYCLES 1000
static inline btSimdFloat4 rand_f4(void)
{
return btAssign128( RANDF_m1p1, RANDF_m1p1, RANDF_m1p1, BT_NAN ); // w channel NaN
}
static inline btSimdFloat4 qtrand_f4(void)
{
return btAssign128( RANDF_m1p1, RANDF_m1p1, RANDF_m1p1, RANDF_m1p1 );
}
static inline btSimdFloat4 qtNAN_f4(void)
{
return btAssign128( BT_NAN, BT_NAN, BT_NAN, BT_NAN );
}
int Test_qtmulV3Q(void)
{
btQuaternion q;
btVector3 v3;
// Init the data
q = btQuaternion(qtrand_f4());
v3 = btVector3(rand_f4());
btQuaternion correct_res, test_res;
correct_res = btQuaternion(qtNAN_f4());
test_res = btQuaternion(qtNAN_f4());
{
correct_res = qtmulV3Q_ref(v3, q);
test_res = BT_OP(v3, q);
if( fabsf(correct_res.x() - test_res.x()) +
fabsf(correct_res.y() - test_res.y()) +
fabsf(correct_res.z() - test_res.z()) +
fabsf(correct_res.w() - test_res.w()) > FLT_EPSILON*8 )
{
vlog( "Error - qtmulV3Q result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f, %10.4f) \n",
correct_res.x(), correct_res.y(),
correct_res.z(), correct_res.w(),
test_res.x(), test_res.y(),
test_res.z(), test_res.w());
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btQuaternion qt_arrR[DATA_SIZE];
btQuaternion qt_arr[DATA_SIZE];
btVector3 v3_arr[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
qt_arr[k] = btQuaternion(qtrand_f4());
v3_arr[k] = btVector3(rand_f4());
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
qt_arrR[k] = qtmulV3Q_ref(v3_arr[k], qt_arr[k]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
qt_arr[k] = btQuaternion(qtrand_f4());
v3_arr[k] = btVector3(rand_f4());
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
qt_arrR[k] = BT_OP(v3_arr[k], qt_arr[k]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
return 0;
}
#endif//#ifdef BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_qtmulV3Q.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_qtmulV3Q_h
#define BulletTest_Test_qtmulV3Q_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_qtmulV3Q(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,176 @@
//
// Test_qtnorm.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_qtnorm.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btQuaternion.h>
#define BT_OP(a) (a.normalize())
// reference code for testing purposes
static inline btQuaternion& qtnorm_ref(btQuaternion& q1);
static inline btQuaternion& qtnorm_ref(btQuaternion& q1)
{
float dot =
q1.x() * q1.x() +
q1.y() * q1.y() +
q1.z() * q1.z() +
q1.w() * q1.w();
dot = 1.0f / sqrtf(dot);
q1.setValue(q1.x()*dot, q1.y()*dot, q1.z()*dot, q1.w()*dot);
return q1;
}
#define LOOPCOUNT 1024
#define NUM_CYCLES 1000
int Test_qtnorm(void)
{
int i;
btQuaternion q1, q2;
float x, y, z, w, vNaN;
vNaN = BT_NAN; // w channel NaN
btQuaternion correct_res, test_res;
for (i=0; i<LOOPCOUNT; i++)
{
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
q1.setValue(x,y,z,w);
q2 = q1;
correct_res.setValue(vNaN, vNaN, vNaN, vNaN);
test_res.setValue(vNaN, vNaN, vNaN, vNaN);
correct_res = qtnorm_ref(q1);
test_res = BT_OP(q2);
if( fabsf(correct_res.x() - test_res.x()) +
fabsf(correct_res.y() - test_res.y()) +
fabsf(correct_res.z() - test_res.z()) +
fabsf(correct_res.w() - test_res.w()) > FLT_EPSILON*10 )
{
vlog( "Error - qtnorm result error! "
"\ncorrect = (%10.7f, %10.7f, %10.7f, %10.7f) "
"\ntested = (%10.7f, %10.7f, %10.7f, %10.7f) \n",
correct_res.x(), correct_res.y(),
correct_res.z(), correct_res.w(),
test_res.x(), test_res.y(),
test_res.z(), test_res.w());
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btQuaternion qt_arr0[DATA_SIZE];
btQuaternion qt_arr1[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
qt_arr1[k].setValue(x,y,z,w);
}
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
{
size_t km = (k & (DATA_SIZE-1));
qt_arr0[km] = qtnorm_ref(qt_arr1[km]);km++;
qt_arr0[km] = qtnorm_ref(qt_arr1[km]);km++;
qt_arr0[km] = qtnorm_ref(qt_arr1[km]);km++;
qt_arr0[km] = qtnorm_ref(qt_arr1[km]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = RANDF_01;
qt_arr1[k].setValue(x,y,z,w);
}
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
{
size_t km = (k & (DATA_SIZE-1));
qt_arr0[km] = BT_OP(qt_arr1[km]);km++;
qt_arr0[km] = BT_OP(qt_arr1[km]);km++;
qt_arr0[km] = BT_OP(qt_arr1[km]);km++;
qt_arr0[km] = BT_OP(qt_arr1[km]);km++;
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
return 0;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_qtnorm.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_qtnorm_h
#define BulletTest_Test_qtnorm_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_qtnorm(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,599 @@
//
// Test_quat_aos_neon.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc., Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_quat_aos_neon.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <vectormath/vmInclude.h>
//typedef Vectormath::Aos::Vector3 vmVector3;
//typedef Vectormath::Aos::Quat vmQuat;
//typedef Vectormath::Aos::Matrix3 vmMatrix3;
//typedef Vectormath::Aos::Transform3 vmTransform3;
//typedef Vectormath::Aos::Point3 vmPoint3;
typedef Vectormath::Aos::Vector4 vmVector4;
// reference code for testing purposes
ATTRIBUTE_ALIGNED16(class) Quat_ref
{
float mX;
float mY;
float mZ;
float mW;
public:
// Default constructor; does no initialization
//
inline Quat_ref( ) { };
// Copy a quaternion
//
inline Quat_ref( const Quat_ref & quat );
// Construct a quaternion from x, y, z, and w elements
//
inline Quat_ref( float x, float y, float z, float w );
// Construct a quaternion from a 3-D vector and a scalar
//
inline Quat_ref( const vmVector3 & xyz, float w );
// Copy elements from a 4-D vector into a quaternion
//
explicit inline Quat_ref( const vmVector4 & vec );
// Convert a rotation matrix to a unit-length quaternion
//
explicit inline Quat_ref( const vmMatrix3 & rotMat );
// Set all elements of a quaternion to the same scalar value
//
explicit inline Quat_ref( float scalar );
// Assign one quaternion to another
//
inline Quat_ref & operator =( const Quat_ref & quat );
// Set the x, y, and z elements of a quaternion
// NOTE:
// This function does not change the w element.
//
inline Quat_ref & setXYZ( const vmVector3 & vec );
// Get the x, y, and z elements of a quaternion
//
inline const vmVector3 getXYZ( ) const;
// Set the x element of a quaternion
//
inline Quat_ref & setX( float x );
// Set the y element of a quaternion
//
inline Quat_ref & setY( float y );
// Set the z element of a quaternion
//
inline Quat_ref & setZ( float z );
// Set the w element of a quaternion
//
inline Quat_ref & setW( float w );
// Get the x element of a quaternion
//
inline float getX( ) const;
// Get the y element of a quaternion
//
inline float getY( ) const;
// Get the z element of a quaternion
//
inline float getZ( ) const;
// Get the w element of a quaternion
//
inline float getW( ) const;
// Set an x, y, z, or w element of a quaternion by index
//
inline Quat_ref & setElem( int idx, float value );
// Get an x, y, z, or w element of a quaternion by index
//
inline float getElem( int idx ) const;
// Subscripting operator to set or get an element
//
inline float & operator []( int idx );
// Subscripting operator to get an element
//
inline float operator []( int idx ) const;
// Add two quaternions
//
inline const Quat_ref operator +( const Quat_ref & quat ) const;
// Subtract a quaternion from another quaternion
//
inline const Quat_ref operator -( const Quat_ref & quat ) const;
// Multiply two quaternions
//
inline const Quat_ref operator *( const Quat_ref & quat ) const;
// Multiply a quaternion by a scalar
//
inline const Quat_ref operator *( float scalar ) const;
// Divide a quaternion by a scalar
//
inline const Quat_ref operator /( float scalar ) const;
// Perform compound assignment and addition with a quaternion
//
inline Quat_ref & operator +=( const Quat_ref & quat );
// Perform compound assignment and subtraction by a quaternion
//
inline Quat_ref & operator -=( const Quat_ref & quat );
// Perform compound assignment and multiplication by a quaternion
//
inline Quat_ref & operator *=( const Quat_ref & quat );
// Perform compound assignment and multiplication by a scalar
//
inline Quat_ref & operator *=( float scalar );
// Perform compound assignment and division by a scalar
//
inline Quat_ref & operator /=( float scalar );
// Negate all elements of a quaternion
//
inline const Quat_ref operator -( ) const;
// Construct an identity quaternion
//
static inline const Quat_ref identity( );
// Construct a quaternion to rotate between two unit-length 3-D vectors
// NOTE:
// The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
//
static inline const Quat_ref rotation( const vmVector3 & unitVec0, const vmVector3 & unitVec1 );
// Construct a quaternion to rotate around a unit-length 3-D vector
//
static inline const Quat_ref rotation( float radians, const vmVector3 & unitVec );
// Construct a quaternion to rotate around the x axis
//
static inline const Quat_ref rotationX( float radians );
// Construct a quaternion to rotate around the y axis
//
static inline const Quat_ref rotationY( float radians );
// Construct a quaternion to rotate around the z axis
//
static inline const Quat_ref rotationZ( float radians );
};
inline Quat_ref::Quat_ref( const Quat_ref & quat )
{
mX = quat.mX;
mY = quat.mY;
mZ = quat.mZ;
mW = quat.mW;
}
inline Quat_ref::Quat_ref( float _x, float _y, float _z, float _w )
{
mX = _x;
mY = _y;
mZ = _z;
mW = _w;
}
inline Quat_ref::Quat_ref( const vmVector3 & xyz, float _w )
{
this->setXYZ( xyz );
this->setW( _w );
}
inline Quat_ref::Quat_ref( const vmVector4 & vec )
{
mX = vec.getX();
mY = vec.getY();
mZ = vec.getZ();
mW = vec.getW();
}
inline Quat_ref::Quat_ref( float scalar )
{
mX = scalar;
mY = scalar;
mZ = scalar;
mW = scalar;
}
inline const Quat_ref Quat_ref::identity( )
{
return Quat_ref( 0.0f, 0.0f, 0.0f, 1.0f );
}
inline void loadXYZW_ref( Quat_ref & quat, const float * fptr )
{
quat = Quat_ref( fptr[0], fptr[1], fptr[2], fptr[3] );
}
inline void storeXYZW_ref( const Quat_ref & quat, float * fptr )
{
fptr[0] = quat.getX();
fptr[1] = quat.getY();
fptr[2] = quat.getZ();
fptr[3] = quat.getW();
}
inline Quat_ref & Quat_ref::operator =( const Quat_ref & quat )
{
mX = quat.mX;
mY = quat.mY;
mZ = quat.mZ;
mW = quat.mW;
return *this;
}
inline Quat_ref & Quat_ref::setXYZ( const vmVector3 & vec )
{
mX = vec.getX();
mY = vec.getY();
mZ = vec.getZ();
return *this;
}
inline const vmVector3 Quat_ref::getXYZ( ) const
{
return vmVector3( mX, mY, mZ );
}
inline Quat_ref & Quat_ref::setX( float _x )
{
mX = _x;
return *this;
}
inline float Quat_ref::getX( ) const
{
return mX;
}
inline Quat_ref & Quat_ref::setY( float _y )
{
mY = _y;
return *this;
}
inline float Quat_ref::getY( ) const
{
return mY;
}
inline Quat_ref & Quat_ref::setZ( float _z )
{
mZ = _z;
return *this;
}
inline float Quat_ref::getZ( ) const
{
return mZ;
}
inline Quat_ref & Quat_ref::setW( float _w )
{
mW = _w;
return *this;
}
inline float Quat_ref::getW( ) const
{
return mW;
}
inline Quat_ref & Quat_ref::setElem( int idx, float value )
{
*(&mX + idx) = value;
return *this;
}
inline float Quat_ref::getElem( int idx ) const
{
return *(&mX + idx);
}
inline float & Quat_ref::operator []( int idx )
{
return *(&mX + idx);
}
inline float Quat_ref::operator []( int idx ) const
{
return *(&mX + idx);
}
inline const Quat_ref Quat_ref::operator +( const Quat_ref & quat ) const
{
return Quat_ref(
( mX + quat.mX ),
( mY + quat.mY ),
( mZ + quat.mZ ),
( mW + quat.mW )
);
}
inline const Quat_ref Quat_ref::operator -( const Quat_ref & quat ) const
{
return Quat_ref(
( mX - quat.mX ),
( mY - quat.mY ),
( mZ - quat.mZ ),
( mW - quat.mW )
);
}
inline const Quat_ref Quat_ref::operator *( float scalar ) const
{
return Quat_ref(
( mX * scalar ),
( mY * scalar ),
( mZ * scalar ),
( mW * scalar )
);
}
inline Quat_ref & Quat_ref::operator +=( const Quat_ref & quat )
{
*this = *this + quat;
return *this;
}
inline Quat_ref & Quat_ref::operator -=( const Quat_ref & quat )
{
*this = *this - quat;
return *this;
}
inline Quat_ref & Quat_ref::operator *=( float scalar )
{
*this = *this * scalar;
return *this;
}
inline const Quat_ref Quat_ref::operator /( float scalar ) const
{
return Quat_ref(
( mX / scalar ),
( mY / scalar ),
( mZ / scalar ),
( mW / scalar )
);
}
inline Quat_ref & Quat_ref::operator /=( float scalar )
{
*this = *this / scalar;
return *this;
}
inline const Quat_ref Quat_ref::operator -( ) const
{
return Quat_ref(
-mX,
-mY,
-mZ,
-mW
);
}
inline const Quat_ref operator *( float scalar, const Quat_ref & quat )
{
return quat * scalar;
}
inline float dot( const Quat_ref & quat0, const Quat_ref & quat1 )
{
float result;
result = ( quat0.getX() * quat1.getX() );
result = ( result + ( quat0.getY() * quat1.getY() ) );
result = ( result + ( quat0.getZ() * quat1.getZ() ) );
result = ( result + ( quat0.getW() * quat1.getW() ) );
return result;
}
inline const Quat_ref lerp( float t, const Quat_ref & quat0, const Quat_ref & quat1 )
{
return ( quat0 + ( ( quat1 - quat0 ) * t ) );
}
inline const Quat_ref slerp( float t, const Quat_ref & unitQuat0, const Quat_ref & unitQuat1 )
{
Quat_ref start;
float recipSinAngle, scale0, scale1, cosAngle, angle;
cosAngle = dot( unitQuat0, unitQuat1 );
if ( cosAngle < 0.0f ) {
cosAngle = -cosAngle;
start = ( -unitQuat0 );
} else {
start = unitQuat0;
}
if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
angle = acosf( cosAngle );
recipSinAngle = ( 1.0f / sinf( angle ) );
scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
} else {
scale0 = ( 1.0f - t );
scale1 = t;
}
return ( ( start * scale0 ) + ( unitQuat1 * scale1 ) );
}
inline const Quat_ref squad( float t, const Quat_ref & unitQuat0, const Quat_ref & unitQuat1, const Quat_ref & unitQuat2, const Quat_ref & unitQuat3 )
{
Quat_ref tmp0, tmp1;
tmp0 = slerp( t, unitQuat0, unitQuat3 );
tmp1 = slerp( t, unitQuat1, unitQuat2 );
return slerp( ( ( 2.0f * t ) * ( 1.0f - t ) ), tmp0, tmp1 );
}
inline float norm( const Quat_ref & quat )
{
float result;
result = ( quat.getX() * quat.getX() );
result = ( result + ( quat.getY() * quat.getY() ) );
result = ( result + ( quat.getZ() * quat.getZ() ) );
result = ( result + ( quat.getW() * quat.getW() ) );
return result;
}
inline float length( const Quat_ref & quat )
{
return ::sqrtf( norm( quat ) );
}
inline const Quat_ref normalize( const Quat_ref & quat )
{
float lenSqr, lenInv;
lenSqr = norm( quat );
lenInv = ( 1.0f / sqrtf( lenSqr ) );
return Quat_ref(
( quat.getX() * lenInv ),
( quat.getY() * lenInv ),
( quat.getZ() * lenInv ),
( quat.getW() * lenInv )
);
}
inline const Quat_ref Quat_ref::rotation( const vmVector3 & unitVec0, const vmVector3 & unitVec1 )
{
float cosHalfAngleX2, recipCosHalfAngleX2;
cosHalfAngleX2 = sqrtf( ( 2.0f * ( 1.0f + dot( unitVec0, unitVec1 ) ) ) );
recipCosHalfAngleX2 = ( 1.0f / cosHalfAngleX2 );
return Quat_ref( ( cross( unitVec0, unitVec1 ) * recipCosHalfAngleX2 ), ( cosHalfAngleX2 * 0.5f ) );
}
inline const Quat_ref Quat_ref::rotation( float radians, const vmVector3 & unitVec )
{
float s, c, angle;
angle = ( radians * 0.5f );
s = sinf( angle );
c = cosf( angle );
return Quat_ref( ( unitVec * s ), c );
}
inline const Quat_ref Quat_ref::rotationX( float radians )
{
float s, c, angle;
angle = ( radians * 0.5f );
s = sinf( angle );
c = cosf( angle );
return Quat_ref( s, 0.0f, 0.0f, c );
}
inline const Quat_ref Quat_ref::rotationY( float radians )
{
float s, c, angle;
angle = ( radians * 0.5f );
s = sinf( angle );
c = cosf( angle );
return Quat_ref( 0.0f, s, 0.0f, c );
}
inline const Quat_ref Quat_ref::rotationZ( float radians )
{
float s, c, angle;
angle = ( radians * 0.5f );
s = sinf( angle );
c = cosf( angle );
return Quat_ref( 0.0f, 0.0f, s, c );
}
inline const Quat_ref Quat_ref::operator *( const Quat_ref & quat ) const
{
return Quat_ref(
( ( ( ( mW * quat.mX ) + ( mX * quat.mW ) ) + ( mY * quat.mZ ) ) - ( mZ * quat.mY ) ),
( ( ( ( mW * quat.mY ) + ( mY * quat.mW ) ) + ( mZ * quat.mX ) ) - ( mX * quat.mZ ) ),
( ( ( ( mW * quat.mZ ) + ( mZ * quat.mW ) ) + ( mX * quat.mY ) ) - ( mY * quat.mX ) ),
( ( ( ( mW * quat.mW ) - ( mX * quat.mX ) ) - ( mY * quat.mY ) ) - ( mZ * quat.mZ ) )
);
}
inline Quat_ref & Quat_ref::operator *=( const Quat_ref & quat )
{
*this = *this * quat;
return *this;
}
inline const vmVector3 rotate( const Quat_ref & quat, const vmVector3 & vec )
{
float tmpX, tmpY, tmpZ, tmpW;
tmpX = ( ( ( quat.getW() * vec.getX() ) + ( quat.getY() * vec.getZ() ) ) - ( quat.getZ() * vec.getY() ) );
tmpY = ( ( ( quat.getW() * vec.getY() ) + ( quat.getZ() * vec.getX() ) ) - ( quat.getX() * vec.getZ() ) );
tmpZ = ( ( ( quat.getW() * vec.getZ() ) + ( quat.getX() * vec.getY() ) ) - ( quat.getY() * vec.getX() ) );
tmpW = ( ( ( quat.getX() * vec.getX() ) + ( quat.getY() * vec.getY() ) ) + ( quat.getZ() * vec.getZ() ) );
return vmVector3(
( ( ( ( tmpW * quat.getX() ) + ( tmpX * quat.getW() ) ) - ( tmpY * quat.getZ() ) ) + ( tmpZ * quat.getY() ) ),
( ( ( ( tmpW * quat.getY() ) + ( tmpY * quat.getW() ) ) - ( tmpZ * quat.getX() ) ) + ( tmpX * quat.getZ() ) ),
( ( ( ( tmpW * quat.getZ() ) + ( tmpZ * quat.getW() ) ) - ( tmpX * quat.getY() ) ) + ( tmpY * quat.getX() ) )
);
}
inline const Quat_ref conj( const Quat_ref & quat )
{
return Quat_ref( -quat.getX(), -quat.getY(), -quat.getZ(), quat.getW() );
}
inline const Quat_ref select( const Quat_ref & quat0, const Quat_ref & quat1, bool select1 )
{
return Quat_ref(
( select1 )? quat1.getX() : quat0.getX(),
( select1 )? quat1.getY() : quat0.getY(),
( select1 )? quat1.getZ() : quat0.getZ(),
( select1 )? quat1.getW() : quat0.getW()
);
}
#define LOOPCOUNT 1000
#define NUM_CYCLES 10000
#define DATA_SIZE 1024
int Test_quat_aos_neon(void)
{
return 0;
}
#endif

View File

@@ -0,0 +1,21 @@
//
// Test_quat_aos_neon.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc., Inc.
//
#ifndef BulletTest_Test_quat_aos_neon_h
#define BulletTest_Test_quat_aos_neon_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_quat_aos_neon(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,181 @@
//
// Test_v3cross.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_v3cross.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static btVector3& v3cross_ref(btVector3& v1, btVector3& v2);
#define LOOPCOUNT 1024
#define NUM_CYCLES 1000
int Test_v3cross(void)
{
btVector3 v1, v2, v3;
float x,y,z,w;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = BT_NAN; // w channel NaN
v1.setValue(x,y,z);
v1.setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v2.setValue(x,y,z);
v2.setW(w);
v3 = v1;
btVector3 correct_res, test_res;
{
float vNaN = BT_NAN;
correct_res.setValue(vNaN, vNaN, vNaN);
test_res.setValue(vNaN, vNaN, vNaN);
correct_res = v3cross_ref(v1, v2);
test_res = v3.cross(v2);
if( fabs(correct_res.m_floats[0] - test_res.m_floats[0]) +
fabs(correct_res.m_floats[1] - test_res.m_floats[1]) +
fabs(correct_res.m_floats[2] - test_res.m_floats[2]) > FLT_EPSILON * 4)
{
vlog( "Error - v3cross result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f) \n",
correct_res.m_floats[0], correct_res.m_floats[1], correct_res.m_floats[2],
test_res.m_floats[0], test_res.m_floats[1], test_res.m_floats[2]);
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btVector3 vec3_arr1[DATA_SIZE];
btVector3 vec3_arr2[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
vec3_arr1[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x,y,z);
vec3_arr2[k].setW(w);
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
vec3_arr1[k] = v3cross_ref(vec3_arr1[k], vec3_arr2[k]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
vec3_arr1[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x,y,z);
vec3_arr2[k].setW(w);
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
vec3_arr1[k] = vec3_arr1[k].cross(vec3_arr2[k]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
return 0;
}
static btVector3& v3cross_ref(btVector3& v1, btVector3& v2)
{
btScalar x,y,z;
x = v1.m_floats[1] * v2.m_floats[2] - v1.m_floats[2] * v2.m_floats[1];
y = v1.m_floats[2] * v2.m_floats[0] - v1.m_floats[0] * v2.m_floats[2];
z = v1.m_floats[0] * v2.m_floats[1] - v1.m_floats[1] * v2.m_floats[0];
v1.m_floats[0] = x;
v1.m_floats[1] = y;
v1.m_floats[2] = z;
return v1;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_v3cross.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_v3cross_h
#define BulletTest_Test_v3cross_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_v3cross(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,178 @@
//
// Test_v3div.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_v3div.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btVector3.h>
#define BT_OP(a, b) ((a) / (b))
// reference code for testing purposes
static inline btVector3& v3div_ref(btVector3& v1, btVector3& v2);
static btVector3& v3div_ref(btVector3& v0, btVector3& v1, btVector3& v2)
{
v0.m_floats[0] = BT_OP(v1.m_floats[0] , v2.m_floats[0]),
v0.m_floats[1] = BT_OP(v1.m_floats[1] , v2.m_floats[1]),
v0.m_floats[2] = BT_OP(v1.m_floats[2] , v2.m_floats[2]);
return v0;
}
#define LOOPCOUNT 1024
#define NUM_CYCLES 1000
int Test_v3div(void)
{
btVector3 v1, v2, v3;
float x,y,z,w;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = BT_NAN; // w channel NaN
v1.setValue(x,y,z);
v1.setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v2.setValue(x,y,z);
v2.setW(w);
v3 = v1;
btVector3 correct_res, test_res;
{
float vNaN = BT_NAN;
correct_res.setValue(vNaN, vNaN, vNaN);
test_res.setValue(vNaN, vNaN, vNaN);
correct_res = v3div_ref(correct_res, v1, v2);
test_res = BT_OP(v3,v2);
if( fabsf(correct_res.m_floats[0] - test_res.m_floats[0]) +
fabsf(correct_res.m_floats[1] - test_res.m_floats[1]) +
fabsf(correct_res.m_floats[2] - test_res.m_floats[2]) > FLT_EPSILON*10 )
{
vlog( "Error - v3div result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f) \n",
correct_res.m_floats[0], correct_res.m_floats[1], correct_res.m_floats[2],
test_res.m_floats[0], test_res.m_floats[1], test_res.m_floats[2]);
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btVector3 vec3_arr0[DATA_SIZE];
btVector3 vec3_arr1[DATA_SIZE];
btVector3 vec3_arr2[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
{
uint64_t startTime, bestTime, currentTime;
w = BT_NAN; // w channel NaN
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
vec3_arr1[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x,y,z);
vec3_arr2[k].setW(w);
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
vec3_arr0[k] = v3div_ref(vec3_arr0[k], vec3_arr1[k], vec3_arr2[k]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
vec3_arr1[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x,y,z);
vec3_arr2[k].setW(w);
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
vec3_arr0[k] = BT_OP(vec3_arr1[k] , vec3_arr2[k]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
return 0;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_v3div.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_v3div_h
#define BulletTest_Test_v3div_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_v3div(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,164 @@
//
// Test_v3dot.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_v3dot.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static inline
btScalar v3dot_ref(
const btVector3& v1,
const btVector3& v2);
#define LOOPCOUNT 1000
#define NUM_CYCLES 10000
int Test_v3dot(void)
{
btVector3 v1, v2;
float x,y,z,w;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = BT_NAN; // w channel NaN
v1.setValue(x,y,z);
v1.setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v2.setValue(x,y,z);
v2.setW(w);
float correctDot0, testDot0;
{
correctDot0 = w;
testDot0 = w; ;
correctDot0 = v3dot_ref(v1, v2);
testDot0 = v1.dot(v2);
if( fabsf(correctDot0 - testDot0) > FLT_EPSILON * 4 )
{
vlog( "Error - v3dot result error! %f != %f \n", correctDot0, testDot0);
return 1;
}
}
#define DATA_SIZE 1024
btVector3 vec3_arr1[DATA_SIZE];
btVector3 vec3_arr2[DATA_SIZE];
btScalar res_arr[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
vec3_arr1[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x,y,z);
vec3_arr2[k].setW(w);
res_arr[k] = w;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
{
size_t k32 = (k & (DATA_SIZE-1));
res_arr[k32] = v3dot_ref( vec3_arr1[k32], vec3_arr2[k32]); k32++;
res_arr[k32] = v3dot_ref( vec3_arr1[k32], vec3_arr2[k32]); k32++;
res_arr[k32] = v3dot_ref( vec3_arr1[k32], vec3_arr2[k32]); k32++;
res_arr[k32] = v3dot_ref( vec3_arr1[k32], vec3_arr2[k32]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
{
size_t k32 = k & (DATA_SIZE -1);
res_arr[k32] = vec3_arr1[k32].dot(vec3_arr2[k32]); k32++;
res_arr[k32] = vec3_arr1[k32].dot(vec3_arr2[k32]); k32++;
res_arr[k32] = vec3_arr1[k32].dot(vec3_arr2[k32]); k32++;
res_arr[k32] = vec3_arr1[k32].dot(vec3_arr2[k32]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT );
return 0;
}
static btScalar v3dot_ref(const btVector3& v1,
const btVector3& v2)
{
return (v1.m_floats[0] * v2.m_floats[0] +
v1.m_floats[1] * v2.m_floats[1] +
v1.m_floats[2] * v2.m_floats[2]);
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_v3dot.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_v3dot_h
#define BulletTest_Test_v3dot_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_v3dot(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,195 @@
//
// Test_v3interp.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_v3interp.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static inline
btVector3& v3interp_ref(
btVector3& vr,
btVector3& v0,
btVector3& v1,
btScalar& rt);
#define LOOPCOUNT 1024
#define NUM_CYCLES 1000
int Test_v3interp(void)
{
btVector3 v1, v2;
btScalar rt;
float x,y,z,w;
float vNaN = BT_NAN;
w = BT_NAN; // w channel NaN
btVector3 correct_res, test_res;
for (rt = 0.0f; rt <= 1.0f; rt += 0.1f)
{
correct_res.setValue(vNaN, vNaN, vNaN);
test_res.setValue(vNaN, vNaN, vNaN);
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v1.setValue(x,y,z);
v1.setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v2.setValue(x,y,z);
v2.setW(w);
correct_res = v3interp_ref(correct_res, v1, v2, rt);
test_res.setInterpolate3(v1, v2, rt);
if( fabs(correct_res.m_floats[0] - test_res.m_floats[0]) +
fabs(correct_res.m_floats[1] - test_res.m_floats[1]) +
fabs(correct_res.m_floats[2] - test_res.m_floats[2]) > FLT_EPSILON * 4)
{
vlog( "Error - v3interp result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f) \n"
"\n rt=%10.4f",
correct_res.m_floats[0], correct_res.m_floats[1], correct_res.m_floats[2],
test_res.m_floats[0], test_res.m_floats[1], test_res.m_floats[2], rt);
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btVector3 vec3_arr1[DATA_SIZE];
btVector3 vec3_arr2[DATA_SIZE];
btScalar rt_arr[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
vec3_arr1[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x,y,z);
vec3_arr2[k].setW(w);
rt_arr[k] = RANDF_01;
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
v3interp_ref(vec3_arr1[k], vec3_arr1[k], vec3_arr2[k], rt_arr[k]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
vec3_arr1[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x,y,z);
vec3_arr2[k].setW(w);
rt_arr[k] = RANDF_01;
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
vec3_arr1[k].setInterpolate3(vec3_arr1[k], vec3_arr2[k], rt_arr[k]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
return 0;
}
static btVector3&
v3interp_ref(
btVector3& vr,
btVector3& v0,
btVector3& v1,
btScalar& rt)
{
btScalar s = btScalar(1.0) - rt;
vr.m_floats[0] = s * v0.m_floats[0] + rt * v1.m_floats[0];
vr.m_floats[1] = s * v0.m_floats[1] + rt * v1.m_floats[1];
vr.m_floats[2] = s * v0.m_floats[2] + rt * v1.m_floats[2];
return vr;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_v3interp.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_v3interp_h
#define BulletTest_Test_v3interp_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_v3interp(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,198 @@
//
// Test_v3lerp.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_v3lerp.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static inline
btVector3&
v3lerp_ref(
btVector3& vr,
btVector3& v0,
btVector3& v1,
btScalar& rt);
#define LOOPCOUNT 1024
#define NUM_CYCLES 1000
int Test_v3lerp(void)
{
btVector3 v1, v2;
btScalar rt;
float x,y,z,w;
float vNaN =BT_NAN;
w =BT_NAN; // w channel NaN
btVector3 correct_res, test_res;
for (rt = 0.0f; rt <= 1.0f; rt += 0.1f)
{
correct_res.setValue(vNaN, vNaN, vNaN);
test_res.setValue(vNaN, vNaN, vNaN);
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v1.setValue(x,y,z);
v1.setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v2.setValue(x,y,z);
v2.setW(w);
correct_res = v3lerp_ref(correct_res, v1, v2, rt);
test_res = v1.lerp(v2, rt);
if( fabs(correct_res.m_floats[0] - test_res.m_floats[0]) +
fabs(correct_res.m_floats[1] - test_res.m_floats[1]) +
fabs(correct_res.m_floats[2] - test_res.m_floats[2]) > FLT_EPSILON * 4)
{
vlog( "Error - v3lerp result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f) \n"
"\n rt=%10.4f",
correct_res.m_floats[0], correct_res.m_floats[1], correct_res.m_floats[2],
test_res.m_floats[0], test_res.m_floats[1], test_res.m_floats[2], rt);
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btVector3 vec3_arr1[DATA_SIZE];
btVector3 vec3_arr2[DATA_SIZE];
btScalar rt_arr[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
{
uint64_t startTime, bestTime, currentTime;
w =BT_NAN; // w channel NaN
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
vec3_arr1[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x,y,z);
vec3_arr2[k].setW(w);
rt_arr[k] = RANDF_01;
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
v3lerp_ref(vec3_arr1[k], vec3_arr1[k], vec3_arr2[k], rt_arr[k]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
vec3_arr1[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr2[k].setValue(x,y,z);
vec3_arr2[k].setW(w);
rt_arr[k] = RANDF_01;
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
vec3_arr1[k] = vec3_arr1[k].lerp(vec3_arr2[k], rt_arr[k]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
return 0;
}
static
btVector3&
v3lerp_ref(
btVector3& vr,
btVector3& v0,
btVector3& v1,
btScalar& rt)
{
vr.m_floats[0] = v0.m_floats[0] + rt * (v1.m_floats[0] - v0.m_floats[0]);
vr.m_floats[1] = v0.m_floats[1] + rt * (v1.m_floats[1] - v0.m_floats[1]);
vr.m_floats[2] = v0.m_floats[2] + rt * (v1.m_floats[2] - v0.m_floats[2]);
return vr;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_v3lerp.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_v3lerp_h
#define BulletTest_Test_v3lerp_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_v3lerp(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,170 @@
//
// Test_v3norm.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_v3norm.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static inline btVector3& v3norm_ref(btVector3& v);
#define LOOPCOUNT 1024
#define NUM_CYCLES 1000
int Test_v3norm(void)
{
btVector3 v1, v2;
float x,y,z,w;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = BT_NAN; // w channel NaN
v1.setValue(x,y,z);
v1.setW(w);
v2 = v1;
btVector3 correct_res, test_res;
{
float vNaN = BT_NAN;
correct_res.setValue(vNaN, vNaN, vNaN);
test_res.setValue(vNaN, vNaN, vNaN);
correct_res = v3norm_ref(v1);
test_res = v2.normalize();
if( fabs(correct_res.m_floats[0] - test_res.m_floats[0]) +
fabs(correct_res.m_floats[1] - test_res.m_floats[1]) +
fabs(correct_res.m_floats[2] - test_res.m_floats[2]) > FLT_EPSILON * 4)
{
vlog( "Error - v3norm result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f) \n",
correct_res.m_floats[0], correct_res.m_floats[1], correct_res.m_floats[2],
test_res.m_floats[0], test_res.m_floats[1], test_res.m_floats[2]);
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btVector3 vec3_arr0[DATA_SIZE];
btVector3 vec3_arr1[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
vec3_arr1[k].setW(w);
}
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
{
vec3_arr0[k] = v3norm_ref(vec3_arr1[k]);
vec3_arr0[k+1] = v3norm_ref(vec3_arr1[k+1]);
vec3_arr0[k+2] = v3norm_ref(vec3_arr1[k+2]);
vec3_arr0[k+3] = v3norm_ref(vec3_arr1[k+3]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
vec3_arr1[k].setW(w);
}
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
{
vec3_arr0[k] = vec3_arr1[k].normalize();
vec3_arr0[k+1] = vec3_arr1[k+1].normalize();
vec3_arr0[k+2] = vec3_arr1[k+2].normalize();
vec3_arr0[k+3] = vec3_arr1[k+3].normalize();
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
return 0;
}
static btVector3& v3norm_ref(btVector3& v)
{
float dot = v.m_floats[0] * v.m_floats[0] +
v.m_floats[1] * v.m_floats[1] +
v.m_floats[2] * v.m_floats[2];
dot = 1.0f / sqrtf(dot);
v.m_floats[0] *= dot;
v.m_floats[1] *= dot;
v.m_floats[2] *= dot;
return v;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_v3norm.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_v3norm_h
#define BulletTest_Test_v3norm_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_v3norm(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,194 @@
//
// Test_v3rotate.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_v3rotate.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static inline
btVector3& v3rotate_ref(
btVector3& v0,
btVector3& v1,
const btScalar& s);
#define LOOPCOUNT 2048
#define NUM_CYCLES 1000
int Test_v3rotate(void)
{
btVector3 v1, v2;
float s;
float x,y,z,w;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = BT_NAN; // w channel NaN
v1.setValue(x,y,z);
v1.setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v2.setValue(x,y,z);
v2.setW(w);
s = RANDF_01 * (float) SIMD_PI;
btVector3 correct_res, test_res;
{
float vNaN = BT_NAN;
correct_res.setValue(vNaN, vNaN, vNaN);
test_res.setValue(vNaN, vNaN, vNaN);
test_res = v1.rotate(v2, s);
correct_res = v3rotate_ref(v1, v2, s);
if( fabs(correct_res.m_floats[0] - test_res.m_floats[0]) +
fabs(correct_res.m_floats[1] - test_res.m_floats[1]) +
fabs(correct_res.m_floats[2] - test_res.m_floats[2]) > FLT_EPSILON * 4)
{
vlog( "Error - v3rotate result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f) \n",
correct_res.m_floats[0], correct_res.m_floats[1], correct_res.m_floats[2],
test_res.m_floats[0], test_res.m_floats[1], test_res.m_floats[2]);
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btVector3 vec3_arr0[DATA_SIZE];
btVector3 vec3_arr1[DATA_SIZE];
btScalar s_arr[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr0[k].setValue(x,y,z);
vec3_arr0[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
vec3_arr1[k].setW(w);
s_arr[k] = RANDF_01 * (float)SIMD_PI;
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
vec3_arr0[k] = v3rotate_ref(vec3_arr0[k], vec3_arr1[k], s_arr[k]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr0[k].setValue(x,y,z);
vec3_arr0[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr1[k].setValue(x,y,z);
vec3_arr1[k].setW(w);
s_arr[k] = RANDF_01 * (float)SIMD_PI;
}
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
vec3_arr0[k ] = vec3_arr0[k ].rotate(vec3_arr1[k ], s_arr[k]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
return 0;
}
static inline
btVector3&
v3rotate_ref(
btVector3& v0,
btVector3& wAxis,
const btScalar& _angle)
{
btVector3 o = wAxis * wAxis.dot( v0 );
btVector3 _x = v0 - o;
btVector3 _y;
_y = wAxis.cross( v0 );
v0 = o + _x * cosf( _angle ) + _y * sinf( _angle );
return v0;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_v3rotate.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_v3rotate_h
#define BulletTest_Test_v3rotate_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_v3rotate(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,181 @@
//
// Test_v3sdiv.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_v3sdiv.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static inline
btVector3& v3sdiv_ref(
btVector3& v,
const btScalar& s);
#define LOOPCOUNT 2048
#define NUM_CYCLES 1000
int Test_v3sdiv(void)
{
btVector3 v1, v2;
btScalar s;
float x,y,z,w;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = BT_NAN; // w channel NaN
v1.setValue(x,y,z);
v1.setW(w);
v2.setValue(x,y,z);
v2.setW(w);
s = (float) RANDF_16;
btVector3 correct_res, test_res;
{
float vNaN = BT_NAN;
correct_res.setValue(vNaN, vNaN, vNaN);
test_res.setValue(vNaN, vNaN, vNaN);
correct_res = v3sdiv_ref(v1, s);
test_res = (v2 /= s);
if( fabs(correct_res.m_floats[0] - test_res.m_floats[0]) +
fabs(correct_res.m_floats[1] - test_res.m_floats[1]) +
fabs(correct_res.m_floats[2] - test_res.m_floats[2]) > FLT_EPSILON * 4)
{
vlog( "Error - v3sdiv result error! "
"\ncorrect = (%10.4f, %10.4f, %10.4f) "
"\ntested = (%10.4f, %10.4f, %10.4f) \n",
correct_res.m_floats[0], correct_res.m_floats[1], correct_res.m_floats[2],
test_res.m_floats[0], test_res.m_floats[1], test_res.m_floats[2]);
return 1;
}
}
#define DATA_SIZE LOOPCOUNT
btVector3 vec3_arr[DATA_SIZE];
btScalar s_arr[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
{
uint64_t startTime, bestTime, currentTime;
bestTime = uint64_t(-1LL);
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr[k].setValue(x,y,z);
vec3_arr[k].setW(w);
s_arr[k] = RANDF_01;
}
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
{
v3sdiv_ref( vec3_arr[k], s_arr[k]);
v3sdiv_ref( vec3_arr[k+1], s_arr[k+1]);
v3sdiv_ref( vec3_arr[k+2], s_arr[k+2]);
v3sdiv_ref( vec3_arr[k+3], s_arr[k+3]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
vec3_arr[k].setValue(x,y,z);
vec3_arr[k].setW(w);
s_arr[k] = RANDF_01;
}
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
{
vec3_arr[k] /= s_arr[k];
vec3_arr[k+1] /= s_arr[k+1];
vec3_arr[k+2] /= s_arr[k+2];
vec3_arr[k+3] /= s_arr[k+3];
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
TicksToCycles( vectorTime ) / LOOPCOUNT );
return 0;
}
static inline
btVector3&
v3sdiv_ref(
btVector3& v,
const btScalar& s)
{
btScalar recip = btScalar(1.0) / s;
v.m_floats[0] *= recip;
v.m_floats[1] *= recip;
v.m_floats[2] *= recip;
return v;
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_v3sdiv.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_v3sdiv_h
#define BulletTest_Test_v3sdiv_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_v3sdiv(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,197 @@
//
// Test_v3skew.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_v3skew.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static void
v3skew_ref(
const btVector3* v,
btVector3* v1,
btVector3* v2,
btVector3* v3);
#define LOOPCOUNT 2048
#define NUM_CYCLES 10000
int Test_v3skew(void)
{
btVector3 v, v1, v2, v3, vt1, vt2, vt3;
float x,y,z,w;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = BT_NAN; // w channel NaN
v.setValue(x,y,z);
v.setW(w);
v1.setValue(w,w,w);
v1.setW(w);
vt3 = vt2 = vt1 = v3 = v2 = v1;
{
v3skew_ref(&v, &v1, &v2, &v3);
v.getSkewSymmetricMatrix(&vt1, &vt2, &vt3);
/*
if( v1.m_floats[0] != vt1.m_floats[0] ||
v1.m_floats[1] != vt1.m_floats[1] ||
v1.m_floats[2] != vt1.m_floats[2] )
*/
if(!(v1 == vt1))
{
vlog( "Error - v3skew result error! "
"\ncorrect v1 = (%10.4f, %10.4f, %10.4f) "
"\ntested v1 = (%10.4f, %10.4f, %10.4f) \n",
v1.m_floats[0], v1.m_floats[1], v1.m_floats[2],
vt1.m_floats[0], vt1.m_floats[1], vt1.m_floats[2]);
return 1;
}
/*
if( v2.m_floats[0] != vt2.m_floats[0] ||
v2.m_floats[1] != vt2.m_floats[1] ||
v2.m_floats[2] != vt2.m_floats[2] )
*/
if(!(v2 == vt2))
{
vlog( "Error - v3skew result error! "
"\ncorrect v2 = (%10.4f, %10.4f, %10.4f) "
"\ntested v2 = (%10.4f, %10.4f, %10.4f) \n",
v2.m_floats[0], v2.m_floats[1], v2.m_floats[2],
vt2.m_floats[0], vt2.m_floats[1], vt2.m_floats[2]);
return 1;
}
/*
if( v3.m_floats[0] != vt3.m_floats[0] ||
v3.m_floats[1] != vt3.m_floats[1] ||
v3.m_floats[2] != vt3.m_floats[2] )
*/
if(!(v3 == vt3))
{
vlog( "Error - v3skew result error! "
"\ncorrect v3 = (%10.4f, %10.4f, %10.4f) "
"\ntested v3 = (%10.4f, %10.4f, %10.4f) \n",
v3.m_floats[0], v3.m_floats[1], v3.m_floats[2],
vt3.m_floats[0], vt3.m_floats[1], vt3.m_floats[2]);
return 1;
}
}
#define DATA_SIZE 256
btVector3 v3_arr0[DATA_SIZE];
btVector3 v3_arr1[DATA_SIZE];
btVector3 v3_arr2[DATA_SIZE];
btVector3 v3_arr3[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v3_arr0[k].setValue(x,y,z);
v3_arr0[k].setW(w);
v3_arr1[k].setValue(w,w,w);
v3_arr1[k].setW(w);
v3_arr3[k] = v3_arr2[k] = v3_arr1[k];
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
size_t k32 = (k & (DATA_SIZE-1));
v3skew_ref( &v3_arr0[k32], &v3_arr1[k32], &v3_arr2[k32], &v3_arr3[k32]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for( k = 0; k < LOOPCOUNT; k++ )
{
size_t k32 = (k & (DATA_SIZE -1));
v3_arr0[k32].getSkewSymmetricMatrix(&v3_arr1[k32], &v3_arr2[k32], &v3_arr3[k32]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT );
return 0;
}
static void
v3skew_ref(
const btVector3* v,
btVector3* v1,
btVector3* v2,
btVector3* v3)
{
v1->setValue(0. ,-v->z(),v->y());
v2->setValue(v->z() ,0. ,-v->x());
v3->setValue(-v->y(),v->x() ,0.);
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_v3skew.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_v3skew_h
#define BulletTest_Test_v3skew_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_v3skew(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,180 @@
//
// Test_v3triple.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
#include "Test_v3triple.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static btScalar
v3triple_ref(
const btVector3& v,
const btVector3& v1,
const btVector3& v2);
#define LOOPCOUNT 1024
#define NUM_CYCLES 10000
int Test_v3triple(void)
{
btVector3 v1, v2, v3;
float x,y,z,w;
// Init the data
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
w = BT_NAN; // w channel NaN
v1.setValue(x,y,z);
v1.setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v2.setValue(x,y,z);
v2.setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v3.setValue(x,y,z);
v3.setW(w);
float correctTriple0, testTriple0;
{
correctTriple0 = w;
testTriple0 = w;
testTriple0 = v3triple_ref(v1,v2,v3);
correctTriple0 = v1.triple(v2, v3);
if( fabsf(correctTriple0 - testTriple0) > FLT_EPSILON * 4 )
{
vlog( "Error - v3triple result error! %f != %f \n", correctTriple0, testTriple0);
return 1;
}
}
#define DATA_SIZE 1024
btVector3 v3_arr1[DATA_SIZE];
btVector3 v3_arr2[DATA_SIZE];
btVector3 v3_arr3[DATA_SIZE];
btScalar res_arr[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
for( k = 0; k < DATA_SIZE; k++ )
{
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v3_arr1[k].setValue(x,y,z);
v3_arr1[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v3_arr2[k].setValue(x,y,z);
v3_arr2[k].setW(w);
x = RANDF_01;
y = RANDF_01;
z = RANDF_01;
v3_arr3[k].setValue(x,y,z);
v3_arr3[k].setW(w);
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = uint64_t(-1LL);
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
{
size_t k32 = (k & (DATA_SIZE-1));
res_arr[k32] = v3triple_ref( v3_arr1[k32], v3_arr2[k32], v3_arr3[k32]); k32++;
res_arr[k32] = v3triple_ref( v3_arr1[k32], v3_arr2[k32], v3_arr3[k32]); k32++;
res_arr[k32] = v3triple_ref( v3_arr1[k32], v3_arr2[k32], v3_arr3[k32]); k32++;
res_arr[k32] = v3triple_ref( v3_arr1[k32], v3_arr2[k32], v3_arr3[k32]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = uint64_t(-1LL);
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
{
size_t k32 = k & (DATA_SIZE -1);
res_arr[k32] = v3_arr1[k32].triple(v3_arr2[k32], v3_arr3[k32]); k32++;
res_arr[k32] = v3_arr1[k32].triple(v3_arr2[k32], v3_arr3[k32]); k32++;
res_arr[k32] = v3_arr1[k32].triple(v3_arr2[k32], v3_arr3[k32]); k32++;
res_arr[k32] = v3_arr1[k32].triple(v3_arr2[k32], v3_arr3[k32]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if( currentTime < bestTime )
bestTime = currentTime;
}
if( 0 == gReportAverageTimes )
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog( "Timing:\n" );
vlog( " \t scalar\t vector\n" );
vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT );
return 0;
}
static btScalar
v3triple_ref(
const btVector3& v,
const btVector3& v1,
const btVector3& v2)
{
return
v.m_floats[0] * (v1.m_floats[1] * v2.m_floats[2] - v1.m_floats[2] * v2.m_floats[1]) +
v.m_floats[1] * (v1.m_floats[2] * v2.m_floats[0] - v1.m_floats[0] * v2.m_floats[2]) +
v.m_floats[2] * (v1.m_floats[0] * v2.m_floats[1] - v1.m_floats[1] * v2.m_floats[0]);
}
#endif //BT_USE_SSE

View File

@@ -0,0 +1,22 @@
//
// Test_v3triple.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Test_v3triple_h
#define BulletTest_Test_v3triple_h
#ifdef __cplusplus
extern "C" {
#endif
int Test_v3triple(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,272 @@
//
// File.c
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include <stdio.h>
#ifdef __APPLE__
#include <mach/mach_time.h>
#include <sys/sysctl.h>
#include <sys/mman.h>
#include <errno.h>
#else
#include "LinearMath/btAlignedAllocator.h"
#endif //__APPLE__
#include <stdlib.h>
#include "Utils.h"
#pragma mark Timing
int gReportNanoseconds = 0;
#ifdef _WIN32
#include <intrin.h>
uint64_t ReadTicks( void )
{
return __rdtsc();
}
double TicksToCycles( uint64_t delta )
{
return double(delta);
}
double TicksToSeconds( uint64_t delta )
{
return double(delta);
}
void *GuardCalloc( size_t count, size_t size, size_t *objectStride )
{
if (objectStride)
*objectStride = size;
return (void*) btAlignedAlloc(count * size,16);
}
void GuardFree( void *buf )
{
btAlignedFree(buf);
}
#endif
#ifdef __APPLE__
uint64_t ReadTicks( void )
{
return mach_absolute_time();
}
double TicksToCycles( uint64_t delta )
{
static long double conversion = 0.0L;
if( 0.0L == conversion )
{
// attempt to get conversion to nanoseconds
mach_timebase_info_data_t info;
int err = mach_timebase_info( &info );
if( err )
return __builtin_nanf("");
conversion = (long double) info.numer / info.denom;
// attempt to get conversion to cycles
if( 0 == gReportNanoseconds )
{
uint64_t frequency = 0;
size_t freq_size = sizeof( frequency );
err = sysctlbyname( "hw.cpufrequency_max", &frequency, &freq_size, NULL, 0 );
if( err || 0 == frequency )
vlog( "Failed to get max cpu frequency. Reporting times as nanoseconds.\n" );
else
{
conversion *= 1e-9L /* sec / ns */ * frequency /* cycles / sec */;
vlog( "Reporting times as cycles. (%2.2f MHz)\n", 1e-6 * frequency );
}
}
else
vlog( "Reporting times as nanoseconds.\n" );
}
return (double) (delta * conversion);
}
double TicksToSeconds( uint64_t delta )
{
static long double conversion = 0.0L;
if( 0.0L == conversion )
{
// attempt to get conversion to nanoseconds
mach_timebase_info_data_t info;
int err = mach_timebase_info( &info );
if( err )
return __builtin_nanf("");
conversion = info.numer / (1e9L * info.denom);
}
return (double) (delta * conversion);
}
#pragma mark -
#pragma mark GuardCalloc
#define kPageSize 4096
typedef struct BufInfo
{
void *head;
size_t count;
size_t stride;
size_t totalSize;
}BufInfo;
static int GuardMarkBuffer( void *buffer, int flag );
void *GuardCalloc( size_t count, size_t size, size_t *objectStride )
{
if( objectStride )
*objectStride = 0;
// Round size up to a multiple of a page size
size_t stride = (size + kPageSize - 1) & -kPageSize;
//Calculate total size of the allocation
size_t totalSize = count * (stride + kPageSize) + kPageSize;
// Allocate
char *buf = (char*)mmap( NULL,
totalSize,
PROT_READ | PROT_WRITE,
MAP_ANON | MAP_SHARED,
0, 0 );
if( MAP_FAILED == buf )
{
vlog( "mmap failed: %d\n", errno );
return NULL;
}
// Find the first byte of user data
char *result = buf + kPageSize;
// Record what we did for posterity
BufInfo *bptr = (BufInfo*) result - 1;
bptr->head = buf;
bptr->count = count;
bptr->stride = stride;
bptr->totalSize = totalSize;
// Place the first guard page. Masks our record above.
if( mprotect(buf, kPageSize, PROT_NONE) )
{
munmap( buf, totalSize);
vlog( "mprotect -1 failed: %d\n", errno );
return NULL;
}
// Place the rest of the guard pages
size_t i;
char *p = result;
for( i = 0; i < count; i++ )
{
p += stride;
if( mprotect(p, kPageSize, PROT_NONE) )
{
munmap( buf, totalSize);
vlog( "mprotect %lu failed: %d\n", i, errno );
return NULL;
}
p += kPageSize;
}
// record the stride from object to object
if( objectStride )
*objectStride = stride + kPageSize;
// return pointer to first object
return result;
}
void GuardFree( void *buf )
{
if( mprotect((char*)buf - kPageSize, kPageSize, PROT_READ) )
{
vlog( "Unable to read buf info. GuardFree failed! %p (%d)\n", buf, errno );
return;
}
BufInfo *bptr = (BufInfo*) buf - 1;
if( munmap( bptr->head, bptr->totalSize ) )
vlog( "Unable to unmap data. GuardFree failed! %p (%d)\n", buf, errno );
}
int GuardMarkReadOnly( void *buf )
{
return GuardMarkBuffer(buf, PROT_READ);
}
int GuardMarkReadWrite( void *buf)
{
return GuardMarkBuffer(buf, PROT_READ | PROT_WRITE);
}
int GuardMarkWriteOnly( void *buf)
{
return GuardMarkBuffer(buf, PROT_WRITE);
}
static int GuardMarkBuffer( void *buf, int flag )
{
if( mprotect((char*)buf - kPageSize, kPageSize, PROT_READ) )
{
vlog( "Unable to read buf info. GuardMarkBuffer %d failed! %p (%d)\n", flag, buf, errno );
return errno;
}
BufInfo *bptr = (BufInfo*) buf - 1;
size_t count = bptr->count;
size_t stride = bptr->stride;
size_t i;
for( i = 0; i < count; i++ )
{
if( mprotect(buf, stride, flag) )
{
vlog( "Unable to protect segment %ld. GuardMarkBuffer %d failed! %p (%d)\n", i, flag, buf, errno );
return errno;
}
bptr += stride + kPageSize;
}
if( mprotect((char*)buf - kPageSize, kPageSize, PROT_NONE) )
{
vlog( "Unable to protect leading guard page. GuardMarkBuffer %d failed! %p (%d)\n", flag, buf, errno );
return errno;
}
return 0;
}
#endif
uint32_t random_number32(void)
{
return ((uint32_t) rand() << 16) ^ rand();
}
uint64_t random_number64(void)
{
return ((uint64_t) rand() << 48) ^
((uint64_t) rand() << 32) ^
((uint64_t) rand() << 16) ^
rand();
}

View File

@@ -0,0 +1,72 @@
//
// Utils.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_Utils_h
#define BulletTest_Utils_h
#include "btIntDefines.h"
#include <stddef.h>
#include <stdio.h>
#ifdef _WIN32
#define LARGE_FLOAT17 (1.f * powf(2,17))
#define RANDF_16 (random_number32() * powf(2,-16))
#define RANDF_01 ( random_number32() * powf(2,-32) )
#define RANDF ( random_number32() * powf(2,-8) )
#define RANDF_m1p1 (2.0f*( random_number32() * powf(2,-32)-1.0f))
#else
#define LARGE_FLOAT17 (0x1.0p17f)
#define RANDF_16 (random_number32() * 0x1.0p-16f)
#define RANDF_01 ( random_number32() * 0x1.0p-32f )
#define RANDF ( random_number32() * 0x1.0p-8f )
#define RANDF_m1p1 (2.0f*( random_number32() * 0x1.0p-32f )-1.0f)
#endif//_WIN32
#ifdef __cplusplus
extern "C" {
#endif
/*********************
* Timing *
*********************/
extern int gReportNanoseconds;
uint64_t ReadTicks( void );
double TicksToCycles( uint64_t delta ); // Performance data should be reported in cycles most of the time.
double TicksToSeconds( uint64_t delta );
/*********************
* Guard Heap *
*********************/
// return buffer containing count objects of size size, with guard pages in betweeen.
// The stride between one object and the next is given by objectStride.
// objectStride may be NULL. Objects so created are freed with GuardFree
void *GuardCalloc( size_t count, size_t size, size_t *objectStride );
void GuardFree( void * );
// mark the contents of a guard buffer read-only or write-only. Return 0 on success.
int GuardMarkReadOnly( void *);
int GuardMarkWriteOnly( void *);
int GuardMarkReadWrite( void *);
/*********************
* Printing *
*********************/
#define vlog( ... ) printf( __VA_ARGS__ )
uint32_t random_number32(void);
uint64_t random_number64(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,19 @@
#ifndef BT_INT_DEFINES_H
#define BT_INT_DEFINES_H
#ifdef __GNUC__
#include <stdint.h>
#elif defined(_MSC_VER)
typedef __int32 int32_t;
typedef __int64 int64_t;
typedef unsigned __int32 uint32_t;
typedef unsigned __int64 uint64_t;
#else
typedef int int32_t;
typedef long long int int64_t;
typedef unsigned int uint32_t;
typedef unsigned long long int uint64_t;
#endif
#endif //BT_INT_DEFINES_H

View File

@@ -0,0 +1,326 @@
//
// main.c
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include <stdio.h>
#ifdef __APPLE__
#include <libgen.h>
#endif //__APPLE__
#include <string.h>
#include <stdlib.h>
#include <limits.h>
#include "main.h"
#include "Utils.h"
#include "TestList.h"
#include "LinearMath/btScalar.h"
#if defined (BT_USE_NEON) || defined (BT_USE_SSE_IN_API)
#ifdef _WIN32
#define strcasecmp _stricmp
#define basename(A) A
#endif
#define EXIT_NO_ERROR INT_MIN
//int gReportNanoseconds = 0; // in Utils.c
int gReportAverageTimes = 0;
int gExitOnError = 0;
char *gFullPath = NULL;
const char *gAppName = NULL;
int gArgc;
const char **gArgv;
typedef struct TestNode
{
struct TestNode *next;
const char *name;
}TestNode;
TestNode *gNodeList = NULL;
static int ParseArgs( int argc, const char *argv[] );
static void PrintUsage( void );
static int Init( void );
static void ListTests(void );
const char *gArch =
#ifdef __i386__
"i386";
#elif defined __x86_64__
"x86_64";
#elif defined __arm__
"arm";
#elif defined _WIN64
"win64";
#elif defined _WIN32
"win32";
#else
#error unknown arch
#endif
#include <stdio.h>
int main (int argc, const char * argv[])
{
// Enable just one test programatically (instead of command-line param)
// TestNode *node = (TestNode*) malloc( sizeof( TestNode ) );
// node->name = "btDbvt";
// node->next = 0;
// gNodeList = node;
srand(0.f);
int numPassedTests=0;
int numFailedTests= 0;
int err;
// Parse arguments. Build gNodeList.
if( (err = ParseArgs( argc, argv ) ) )
{
if( EXIT_NO_ERROR == err )
return 0;
PrintUsage();
return err;
}
printf("Arch: %s\n", gArch );
if( gReportAverageTimes )
printf( "Reporting average times.\n" );
else
printf( "Reporting best times.\n" );
// Set a few things up
if( (err = Init() ))
{
printf( "Init failed.\n" );
return err;
}
if( NULL == gNodeList )
{ // test everything
printf( "No function list found. Testing everything...\n" );
size_t i;
for( i = 0; NULL != gTestList[i].test_func; i++ )
{
printf( "\n----------------------------------------------\n" );
printf( "Testing %s:\n", gTestList[i].name );
printf( "----------------------------------------------\n" );
uint64_t startTime = ReadTicks();
int local_error = gTestList[i].test_func();
uint64_t currentTime = ReadTicks() - startTime;
if( local_error )
{
numFailedTests++;
printf( "*** %s test failed with error: %d\n", gTestList[i].name, local_error );
if( gExitOnError )
return local_error;
if( 0 == err )
err = local_error;
}
else
{
numPassedTests++;
printf("%s Passed.\t\t\t(%2.2gs)\n", gTestList[i].name, TicksToSeconds(currentTime));
}
}
}
else
{ // test just the list
while( NULL != gNodeList )
{
TestNode *currentNode = gNodeList;
gNodeList = gNodeList->next;
// Find the test with that name
size_t i;
for( i = 0; NULL != gTestList[i].test_func; i++ )
if( 0 == strcasecmp( currentNode->name, gTestList[i].name ) )
break;
if( NULL != gTestList[i].test_func )
{
printf( "\n----------------------------------------------\n" );
printf( "Testing %s:\n", gTestList[i].name );
printf( "----------------------------------------------\n" );
uint64_t startTime = ReadTicks();
int local_error = gTestList[i].test_func();
uint64_t currentTime = ReadTicks() - startTime;
if( local_error )
{
numFailedTests++;
printf( "*** %s test failed with error: %d\n", gTestList[i].name, local_error );
if( gExitOnError )
return local_error;
if( 0 == err )
err = local_error;
}
else
{
numPassedTests++;
printf("%s Passed.\t\t\t(%2.2gs)\n", gTestList[i].name, TicksToSeconds(currentTime));
}
}
else
{
printf( "\n***Error: Test name \"%s\" not found! Skipping.\n", currentNode->name );
err = -1;
if( gExitOnError )
return -1;
}
free( currentNode );
}
}
printf( "\n----------------------------------------------\n" );
printf("numPassedTests = %d, numFailedTests = %d\n",numPassedTests,numFailedTests);
free(gFullPath);
return err;
}
static int Init( void )
{
// init the timer
TicksToCycles(0);
return 0;
}
static int ParseArgs( int argc, const char *argv[] )
{
int listTests = 0;
TestNode *list = NULL;
gArgc = argc;
gArgv = argv;
gFullPath = (char*)malloc( strlen(argv[0]) + 1);
strcpy(gFullPath, argv[0]);
gAppName = basename( gFullPath );
if( NULL == gAppName )
gAppName = "<unknown app name>";
printf( "%s ", gAppName );
int skipremaining=0;
size_t i;
for( i = 1; i < argc; i++ )
{
const char *arg = argv[i];
printf( "\t%s", arg );
if( arg[0] == '-' )
{
arg++;
while( arg[0] != '\0' )
{
int stop = 0;
switch( arg[0] )
{
case 'a':
gReportAverageTimes ^= 1;
break;
case 'e':
gExitOnError ^= 1;
break;
case 'h':
PrintUsage();
return EXIT_NO_ERROR;
case 'l':
listTests ^= 1;
return EXIT_NO_ERROR;
case 's':
gReportNanoseconds ^= 1;
break;
case ' ':
stop = 1;
break;
case 'N'://ignore the -NSDocumentRevisionsDebugMode argument from XCode 4.3.2
skipremaining = 1;
stop = 1;
break;
default:
printf( "\nError: Unknown flag \'%c\'\n", arg[0] );
return -1;
}
if( stop )
break;
arg++;
}
}
else
{ // add function name to the list
TestNode *node = (TestNode*) malloc( sizeof( TestNode ) );
node->name = arg;
node->next = list;
list = node;
}
if (skipremaining)
break;
}
// reverse the list of test names, and stick on gNodeList
while( list )
{
TestNode *node = list;
TestNode *next = node->next;
node->next = gNodeList;
gNodeList = node;
list = next;
}
printf( "\n" );
if( listTests )
ListTests();
return 0;
}
static void PrintUsage( void )
{
printf("\nUsage:\n" );
printf("%s: <-aehls> <test names>", gAppName);
printf("Options:\n");
printf("\t-a\tToggle report average times vs. best times. (Default: best times)\n");
printf("\t-e\tToggle exit immediately on error behavior. (Default: off)\n");
printf("\t-h\tPrint this message.\n");
printf("\t-l\tToggle list available test names. (Default: off)\n");
printf("\t-s\tToggle report times in cycles or nanoseconds. (Default: cycles)\n\n");
printf("\tOptions may be followed by one or more test names. If no test names \n" );
printf("\tare provided, then all tests are run.\n\n");
}
static void ListTests(void )
{
size_t i;
printf("\nTests:\n");
for( i = 0; NULL != gTestList[i].test_func; i++ )
{
printf( "%19s", gTestList[i].name );
if( NULL != gTestList[i].test_func )
printf( "," );
if( 3 == (i&3) )
printf( "\n" );
}
}
#else
#include <stdio.h>
int main(int argc, char* argv[])
{
printf("error: no SIMD enabled through BT_USE_NEON or BT_USE_SSE_IN_API \n(enable in LinearMath/btScalar.h or through build system)\n");
return 0;
}
#endif

View File

@@ -0,0 +1,25 @@
//
// main.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_main_h
#define BulletTest_main_h
#ifdef __cplusplus
extern "C" {
#endif
extern int gReportAverageTimes; // if 0, report best times
extern int gExitOnError; // if non-zero, exit as soon an an error is encountered
extern const char *gAppName; // the name of this application
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,70 @@
//
// vector.h
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#ifndef BulletTest_vector_h
#define BulletTest_vector_h
#ifdef __SSE__
typedef float float4 __attribute__ ((__vector_size__(16)));
#include <xmmintrin.h>
#endif
#ifdef __SSE2__
typedef double double2 __attribute__ ((__vector_size__(16)));
typedef char char16 __attribute__ ((__vector_size__(16)));
typedef unsigned char uchar16 __attribute__ ((__vector_size__(16)));
typedef short short8 __attribute__ ((__vector_size__(16)));
typedef unsigned short ushort8 __attribute__ ((__vector_size__(16)));
typedef int int4 __attribute__ ((__vector_size__(16)));
// typedef unsigned int uint4 __attribute__ ((__vector_size__(16)));
#ifdef __LP64__
typedef long long2 __attribute__ ((__vector_size__(16)));
typedef unsigned long ulong2 __attribute__ ((__vector_size__(16)));
#else
typedef long long long2 __attribute__ ((__vector_size__(16)));
typedef unsigned long long ulong2 __attribute__ ((__vector_size__(16)));
#endif
#include <emmintrin.h>
#endif
#ifdef __SSE3__
#include <pmmintrin.h>
#endif
#ifdef __SSSE3__
#include <tmmintrin.h>
#endif
#ifdef __SSE4_1__
#include <smmintrin.h>
#endif
#ifdef __arm__
#include <arm/arch.h>
#ifdef _ARM_ARCH_7
#define ARM_NEON_GCC_COMPATIBILITY 1
#include <arm_neon.h>
typedef float float4 __attribute__ ((__vector_size__(16)));
typedef double double2 __attribute__ ((__vector_size__(16)));
typedef char char16 __attribute__ ((__vector_size__(16)));
typedef unsigned char uchar16 __attribute__ ((__vector_size__(16)));
typedef short short8 __attribute__ ((__vector_size__(16)));
typedef unsigned short ushort8 __attribute__ ((__vector_size__(16)));
typedef int int4 __attribute__ ((__vector_size__(16)));
typedef unsigned int uint4 __attribute__ ((__vector_size__(16)));
#ifdef __LP64__
typedef long long2 __attribute__ ((__vector_size__(16)));
typedef unsigned long ulong2 __attribute__ ((__vector_size__(16)));
#else
typedef long long long2 __attribute__ ((__vector_size__(16)));
typedef unsigned long long ulong2 __attribute__ ((__vector_size__(16)));
#endif
#endif
#endif
#endif

23
test/Bullet2/premake4.lua Normal file
View File

@@ -0,0 +1,23 @@
project "AppUnitTest"
if _OPTIONS["ios"] then
kind "WindowedApp"
else
kind "ConsoleApp"
end
targetdir "bin"
includedirs {"../src","Source", "Source/Tests"}
links {
"BulletDynamics","BulletCollision", "LinearMath"
}
language "C++"
files {
"Source/**.cpp",
"Source/**.h",
}