Files
bullet3/test/Bullet2/Source/Tests/Test_maxdot.cpp
erwincoumans ab8f16961e Code-style consistency improvement:
Apply clang-format-all.sh using the _clang-format file through all the cpp/.h files.
make sure not to apply it to certain serialization structures, since some parser expects the * as part of the name, instead of type.
This commit contains no other changes aside from adding and applying clang-format-all.sh
2018-09-23 14:17:31 -07:00

277 lines
7.0 KiB
C++

//
// Test_maxdot.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined(BT_USE_SSE_IN_API) || defined(BT_USE_NEON)
#include "Test_maxdot.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static long maxdot_ref(const btSimdFloat4 *vertices,
float *vec,
size_t count,
float *dotResult);
#ifdef __arm__
#define MAX_LOG2_SIZE 9
#else
#define MAX_LOG2_SIZE 10
#endif
#define MAX_SIZE (1U << MAX_LOG2_SIZE)
#define LOOPCOUNT 10
int Test_maxdot(void)
{
// Init an array flanked by guard pages
btSimdFloat4 *data = (btSimdFloat4 *)GuardCalloc(1, MAX_SIZE * sizeof(btSimdFloat4), NULL);
float *fp = (float *)data;
long correct, test;
btVector3 localScaling(0.1f, 0.2f, 0.3f);
size_t size;
// Init the data
size_t i;
for (i = 0; i < MAX_SIZE; i++)
{
fp[4 * i] = (int32_t)RANDF_16;
fp[4 * i + 1] = (int32_t)RANDF_16;
fp[4 * i + 2] = (int32_t)RANDF_16;
fp[4 * i + 3] = BT_NAN; // w channel NaN
}
float correctDot, testDot;
fp = (float *)localScaling;
float maxRelativeError = 0.f;
for (size = 1; size <= MAX_SIZE; size++)
{
float *in = (float *)(data + MAX_SIZE - size);
size_t position;
for (position = 0; position < size; position++)
{
float *biggest = in + position * 4;
float old[4] = {biggest[0], biggest[1], biggest[2], biggest[3]};
biggest[0] += LARGE_FLOAT17;
biggest[1] += LARGE_FLOAT17;
biggest[2] += LARGE_FLOAT17;
biggest[3] += LARGE_FLOAT17;
correctDot = BT_NAN;
testDot = BT_NAN;
correct = maxdot_ref((btSimdFloat4 *)in, (float *)&localScaling, size, &correctDot);
test = localScaling.maxDot((btVector3 *)in, size, testDot);
if (test < 0 || test >= size)
{
vlog("Error @ %ld: index out of bounds! *%ld vs %ld \n", size, correct, test);
continue;
}
if (correct != test)
{
vlog("Error @ %ld: index misreported! *%ld vs %ld (*%f, %f)\n", size, correct, test,
fp[0] * in[4 * correct] + fp[1] * in[4 * correct + 1] + fp[2] * in[4 * correct + 2],
fp[0] * in[4 * test] + fp[1] * in[4 * test + 1] + fp[2] * in[4 * test + 2]);
return 1;
}
if (test != position)
{
vlog("Biggest not found where it is supposed to be: *%ld vs %ld (*%f, %f)\n", position, test,
fp[0] * in[4 * test] + fp[1] * in[4 * test + 1] + fp[2] * in[4 * test + 2],
fp[0] * in[4 * position] + fp[1] * in[4 * position + 1] + fp[2] * in[4 * position + 2]);
return 1;
}
if (correctDot != testDot)
{
float relativeError = btFabs((testDot - correctDot) / correctDot);
if (relativeError > 1e-6)
{
vlog("Error @ %ld: dotpr misreported! *%f vs %f (*%f, %f)\n", size, correctDot, testDot,
fp[0] * in[4 * correct] + fp[1] * in[4 * correct + 1] + fp[2] * in[4 * correct + 2],
fp[0] * in[4 * test] + fp[1] * in[4 * test + 1] + fp[2] * in[4 * test + 2]);
return 1;
}
else
{
if (maxRelativeError < relativeError)
{
maxRelativeError = relativeError;
#ifdef VERBOSE_WARNING
sprintf(errStr, "Warning @ %ld: dotpr misreported! *%f vs %f (*%f, %f)\n", size, correctDot, testDot,
fp[0] * in[4 * correct] + fp[1] * in[4 * correct + 1] + fp[2] * in[4 * correct + 2],
fp[0] * in[4 * test] + fp[1] * in[4 * test + 1] + fp[2] * in[4 * test + 2]);
#endif //VERBOSE_WARNING
}
}
}
memcpy(biggest, old, 16);
}
}
if (maxRelativeError)
{
printf("Warning: relative error = %e\n", maxRelativeError);
#ifdef VERBOSE_WARNING
vlog(errStr);
#endif
}
uint64_t scalarTimes[33 + (MAX_LOG2_SIZE - 5)];
uint64_t vectorTimes[33 + (MAX_LOG2_SIZE - 5)];
size_t j, k;
float *in = (float *)data;
for (size = 1; size <= 32; size++)
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTimes[size] = 0;
for (j = 0; j < 100; j++)
{
startTime = ReadTicks();
for (k = 0; k < LOOPCOUNT; k++)
correct += maxdot_ref((btSimdFloat4 *)in, (float *)&localScaling, size, &correctDot);
currentTime = ReadTicks() - startTime;
scalarTimes[size] += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
scalarTimes[size] = bestTime;
else
scalarTimes[size] /= 100;
}
uint64_t *timep = &scalarTimes[33];
for (size = 64; size <= MAX_SIZE; size *= 2)
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
timep[0] = 0;
for (j = 0; j < 100; j++)
{
startTime = ReadTicks();
for (k = 0; k < LOOPCOUNT; k++)
correct += maxdot_ref((btSimdFloat4 *)in, (float *)&localScaling, size, &correctDot);
currentTime = ReadTicks() - startTime;
timep[0] += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
timep[0] = bestTime;
else
timep[0] /= 100;
timep++;
}
for (size = 1; size <= 32; size++)
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTimes[size] = 0;
for (j = 0; j < 100; j++)
{
startTime = ReadTicks();
for (k = 0; k < LOOPCOUNT; k++)
test += localScaling.maxDot((btVector3 *)in, size, testDot);
currentTime = ReadTicks() - startTime;
vectorTimes[size] += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
vectorTimes[size] = bestTime;
else
vectorTimes[size] /= 100;
}
timep = &vectorTimes[33];
for (size = 64; size <= MAX_SIZE; size *= 2)
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
timep[0] = 0;
for (j = 0; j < 100; j++)
{
startTime = ReadTicks();
for (k = 0; k < LOOPCOUNT; k++)
test += localScaling.maxDot((btVector3 *)in, size, testDot);
currentTime = ReadTicks() - startTime;
timep[0] += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
timep[0] = bestTime;
else
timep[0] /= 100;
timep++;
}
vlog("Timing:\n");
vlog(" size\t scalar\t vector\n");
for (size = 1; size <= 32; size++)
vlog("%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles(scalarTimes[size]) / LOOPCOUNT, TicksToCycles(vectorTimes[size]) / LOOPCOUNT);
size_t index = 33;
for (size = 64; size <= MAX_SIZE; size *= 2)
{
vlog("%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles(scalarTimes[index]) / LOOPCOUNT, TicksToCycles(vectorTimes[index]) / LOOPCOUNT);
index++;
}
// Useless check to make sure that the timing loops are not optimized away
if (test != correct)
vlog("Error: Test != correct: *%ld vs. %ld\n", correct, test);
GuardFree(data);
return 0;
}
static long maxdot_ref(const btSimdFloat4 *vertices,
float *vec,
size_t count,
float *dotResult)
{
const float *dp = (const float *)vertices;
float maxDot = -BT_INFINITY;
long i = 0;
long ptIndex = -1;
for (i = 0; i < count; i++)
{
float dot = vec[0] * dp[0] + vec[1] * dp[1] + vec[2] * dp[2];
dp += 4;
if (dot > maxDot)
{
maxDot = dot;
ptIndex = i;
}
}
*dotResult = maxDot;
return ptIndex;
}
#endif //BT_USE_SSE