This commit is contained in:
erwincoumans
2013-06-20 12:55:35 -07:00
66 changed files with 1026 additions and 914 deletions

View File

@@ -651,7 +651,7 @@ void b3DynamicBvh::extractLeaves(const b3DbvtNode* node,b3AlignedObjectArray<c
#include <stdio.h>
#include <stdlib.h>
#include "LinearMath/b3QuickProf.h"
/*
q6600,2.4ghz

View File

@@ -36,7 +36,7 @@ subject to the following restrictions:
#if B3_DBVT_BP_PROFILE
#define B3_DBVT_BP_PROFILING_RATE 256
#include "LinearMath/b3Quickprof.h"
#endif

View File

@@ -82,6 +82,37 @@ void b3OutputErrorMessageVarArgsInternal(const char *str, ...)
va_end(argList);
}
void b3EnterProfileZoneDefault(const char* name)
{
}
void b3LeaveProfileZoneDefault()
{
}
static b3EnterProfileZoneFunc* b3s_enterFunc = b3EnterProfileZoneDefault;
static b3LeaveProfileZoneFunc* b3s_leaveFunc = b3LeaveProfileZoneDefault;
void b3EnterProfileZone(const char* name)
{
(b3s_enterFunc)(name);
}
void b3LeaveProfileZone()
{
(b3s_leaveFunc)();
}
void b3SetCustomEnterProfileZoneFunc(b3EnterProfileZoneFunc* enterFunc)
{
b3s_enterFunc = enterFunc;
}
void b3SetCustomLeaveProfileZoneFunc(b3LeaveProfileZoneFunc* leaveFunc)
{
b3s_leaveFunc = leaveFunc;
}
#ifndef _WIN32
#undef vsprintf_s
#endif

View File

@@ -2,21 +2,6 @@
#ifndef B3_LOGGING_H
#define B3_LOGGING_H
typedef void (b3PrintfFunc)(const char* msg);
typedef void (b3WarningMessageFunc)(const char* msg);
typedef void (b3ErrorMessageFunc)(const char* msg);
///The developer can route b3Printf output using their own implementation
void b3SetCustomPrintfFunc(b3PrintfFunc* printfFunc);
void b3SetCustomWarningMessageFunc(b3WarningMessageFunc* warningMsgFunc);
void b3SetCustomErrorMessageFunc(b3ErrorMessageFunc* errorMsgFunc);
///Don't use those internal functions directly, use the b3Printf or b3SetCustomPrintfFunc instead (or warning/error version)
void b3OutputPrintfVarArgsInternal(const char *str, ...);
void b3OutputWarningMessageVarArgsInternal(const char *str, ...);
void b3OutputErrorMessageVarArgsInternal(const char *str, ...);
///We add the do/while so that the statement "if (condition) b3Printf("test"); else {...}" would fail
///You can also customize the message by uncommenting out a different line below
#define b3Printf(...) b3OutputPrintfVarArgsInternal(__VA_ARGS__)
@@ -28,4 +13,57 @@ void b3OutputErrorMessageVarArgsInternal(const char *str, ...);
#define b3Warning(...) do {b3OutputWarningMessageVarArgsInternal("b3Warning[%s,%d]:\n",__FILE__,__LINE__);b3OutputWarningMessageVarArgsInternal(__VA_ARGS__); }while(0)
#define b3Error(...) do {b3OutputErrorMessageVarArgsInternal("b3Error[%s,%d]:\n",__FILE__,__LINE__);b3OutputErrorMessageVarArgsInternal(__VA_ARGS__); } while(0)
#ifndef B3_NO_PROFILE
void b3EnterProfileZone(const char* name);
void b3LeaveProfileZone();
class b3ProfileZone
{
public:
b3ProfileZone(const char* name)
{
b3EnterProfileZone( name );
}
~b3ProfileZone()
{
b3LeaveProfileZone();
}
};
#define B3_PROFILE( name ) b3ProfileZone __profile( name )
#else //B3_NO_PROFILE
#define B3_PROFILE( name )
#define b3StartProfile(a)
#define b3StopProfile
#endif //#ifndef B3_NO_PROFILE
typedef void (b3PrintfFunc)(const char* msg);
typedef void (b3WarningMessageFunc)(const char* msg);
typedef void (b3ErrorMessageFunc)(const char* msg);
typedef void (b3EnterProfileZoneFunc)(const char* msg);
typedef void (b3LeaveProfileZoneFunc)();
///The developer can route b3Printf output using their own implementation
void b3SetCustomPrintfFunc(b3PrintfFunc* printfFunc);
void b3SetCustomWarningMessageFunc(b3WarningMessageFunc* warningMsgFunc);
void b3SetCustomErrorMessageFunc(b3ErrorMessageFunc* errorMsgFunc);
///Set custom profile zone functions (zones can be nested)
void b3SetCustomEnterProfileZoneFunc(b3EnterProfileZoneFunc* enterFunc);
void b3SetCustomLeaveProfileZoneFunc(b3LeaveProfileZoneFunc* leaveFunc);
///Don't use those internal functions directly, use the b3Printf or b3SetCustomPrintfFunc instead (or warning/error version)
void b3OutputPrintfVarArgsInternal(const char *str, ...);
void b3OutputWarningMessageVarArgsInternal(const char *str, ...);
void b3OutputErrorMessageVarArgsInternal(const char *str, ...);
#endif//B3_LOGGING_H

View File

@@ -1,643 +0,0 @@
/*
Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
/*
***************************************************************************************************
**
** profile.cpp
**
** Real-Time Hierarchical Profiling for Game Programming Gems 3
**
** by Greg Hjelstrom & Byon Garrabrant
**
***************************************************************************************************/
// Credits: The Clock class was inspired by the Timer classes in
// Ogre (www.ogre3d.org).
#include "b3Quickprof.h"
#include "b3MinMax.h"
#ifndef B3_NO_PROFILE
static b3Clock b3s_profileClock;
#ifdef __CELLOS_LV2__
#include <sys/sys_time.h>
#include <sys/time_util.h>
#include <stdio.h>
#endif
#if defined (SUNOS) || defined (__SUNOS__)
#include <stdio.h>
#endif
#if defined(WIN32) || defined(_WIN32)
#define B3_USE_WINDOWS_TIMERS
#define WIN32_LEAN_AND_MEAN
#define NOWINRES
#define NOMCX
#define NOIME
#ifdef _XBOX
#include <Xtl.h>
#else //_XBOX
#include <windows.h>
#endif //_XBOX
#include <time.h>
#else //_WIN32
#include <sys/time.h>
#endif //_WIN32
struct b3ClockData
{
#ifdef B3_USE_WINDOWS_TIMERS
LARGE_INTEGER mClockFrequency;
DWORD mStartTick;
LONGLONG mPrevElapsedTime;
LARGE_INTEGER mStartTime;
#else
#ifdef __CELLOS_LV2__
uint64_t mStartTime;
#else
struct timeval mStartTime;
#endif
#endif //__CELLOS_LV2__
};
///The b3Clock is a portable basic clock that measures accurate time in seconds, use for profiling.
b3Clock::b3Clock()
{
m_data = new b3ClockData;
#ifdef B3_USE_WINDOWS_TIMERS
QueryPerformanceFrequency(&m_data->mClockFrequency);
#endif
reset();
}
b3Clock::~b3Clock()
{
delete m_data;
}
b3Clock::b3Clock(const b3Clock& other)
{
m_data = new b3ClockData;
*m_data = *other.m_data;
}
b3Clock& b3Clock::operator=(const b3Clock& other)
{
*m_data = *other.m_data;
return *this;
}
/// Resets the initial reference time.
void b3Clock::reset()
{
#ifdef B3_USE_WINDOWS_TIMERS
QueryPerformanceCounter(&m_data->mStartTime);
m_data->mStartTick = GetTickCount();
m_data->mPrevElapsedTime = 0;
#else
#ifdef __CELLOS_LV2__
typedef uint64_t ClockSize;
ClockSize newTime;
//__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory");
SYS_TIMEBASE_GET( newTime );
m_data->mStartTime = newTime;
#else
gettimeofday(&m_data->mStartTime, 0);
#endif
#endif
}
/// Returns the time in ms since the last call to reset or since
/// the b3Clock was created.
unsigned long int b3Clock::getTimeMilliseconds()
{
#ifdef B3_USE_WINDOWS_TIMERS
LARGE_INTEGER currentTime;
QueryPerformanceCounter(&currentTime);
LONGLONG elapsedTime = currentTime.QuadPart -
m_data->mStartTime.QuadPart;
// Compute the number of millisecond ticks elapsed.
unsigned long msecTicks = (unsigned long)(1000 * elapsedTime /
m_data->mClockFrequency.QuadPart);
// Check for unexpected leaps in the Win32 performance counter.
// (This is caused by unexpected data across the PCI to ISA
// bridge, aka south bridge. See Microsoft KB274323.)
unsigned long elapsedTicks = GetTickCount() - m_data->mStartTick;
signed long msecOff = (signed long)(msecTicks - elapsedTicks);
if (msecOff < -100 || msecOff > 100)
{
// Adjust the starting time forwards.
LONGLONG msecAdjustment = b3Min(msecOff *
m_data->mClockFrequency.QuadPart / 1000, elapsedTime -
m_data->mPrevElapsedTime);
m_data->mStartTime.QuadPart += msecAdjustment;
elapsedTime -= msecAdjustment;
// Recompute the number of millisecond ticks elapsed.
msecTicks = (unsigned long)(1000 * elapsedTime /
m_data->mClockFrequency.QuadPart);
}
// Store the current elapsed time for adjustments next time.
m_data->mPrevElapsedTime = elapsedTime;
return msecTicks;
#else
#ifdef __CELLOS_LV2__
uint64_t freq=sys_time_get_timebase_frequency();
double dFreq=((double) freq) / 1000.0;
typedef uint64_t ClockSize;
ClockSize newTime;
SYS_TIMEBASE_GET( newTime );
//__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory");
return (unsigned long int)((double(newTime-m_data->mStartTime)) / dFreq);
#else
struct timeval currentTime;
gettimeofday(&currentTime, 0);
return (currentTime.tv_sec - m_data->mStartTime.tv_sec) * 1000 +
(currentTime.tv_usec - m_data->mStartTime.tv_usec) / 1000;
#endif //__CELLOS_LV2__
#endif
}
/// Returns the time in us since the last call to reset or since
/// the Clock was created.
unsigned long int b3Clock::getTimeMicroseconds()
{
#ifdef B3_USE_WINDOWS_TIMERS
LARGE_INTEGER currentTime;
QueryPerformanceCounter(&currentTime);
LONGLONG elapsedTime = currentTime.QuadPart -
m_data->mStartTime.QuadPart;
// Compute the number of millisecond ticks elapsed.
unsigned long msecTicks = (unsigned long)(1000 * elapsedTime /
m_data->mClockFrequency.QuadPart);
// Check for unexpected leaps in the Win32 performance counter.
// (This is caused by unexpected data across the PCI to ISA
// bridge, aka south bridge. See Microsoft KB274323.)
unsigned long elapsedTicks = GetTickCount() - m_data->mStartTick;
signed long msecOff = (signed long)(msecTicks - elapsedTicks);
if (msecOff < -100 || msecOff > 100)
{
// Adjust the starting time forwards.
LONGLONG msecAdjustment = b3Min(msecOff *
m_data->mClockFrequency.QuadPart / 1000, elapsedTime -
m_data->mPrevElapsedTime);
m_data->mStartTime.QuadPart += msecAdjustment;
elapsedTime -= msecAdjustment;
}
// Store the current elapsed time for adjustments next time.
m_data->mPrevElapsedTime = elapsedTime;
// Convert to microseconds.
unsigned long usecTicks = (unsigned long)(1000000 * elapsedTime /
m_data->mClockFrequency.QuadPart);
return usecTicks;
#else
#ifdef __CELLOS_LV2__
uint64_t freq=sys_time_get_timebase_frequency();
double dFreq=((double) freq)/ 1000000.0;
typedef uint64_t ClockSize;
ClockSize newTime;
//__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory");
SYS_TIMEBASE_GET( newTime );
return (unsigned long int)((double(newTime-m_data->mStartTime)) / dFreq);
#else
struct timeval currentTime;
gettimeofday(&currentTime, 0);
return (currentTime.tv_sec - m_data->mStartTime.tv_sec) * 1000000 +
(currentTime.tv_usec - m_data->mStartTime.tv_usec);
#endif//__CELLOS_LV2__
#endif
}
inline void b3Profile_Get_Ticks(unsigned long int * ticks)
{
*ticks = b3s_profileClock.getTimeMicroseconds();
}
inline float b3Profile_Get_Tick_Rate(void)
{
// return 1000000.f;
return 1000.f;
}
/***************************************************************************************************
**
** b3ProfileNode
**
***************************************************************************************************/
/***********************************************************************************************
* INPUT: *
* name - pointer to a static string which is the name of this profile node *
* parent - parent pointer *
* *
* WARNINGS: *
* The name is assumed to be a static pointer, only the pointer is stored and compared for *
* efficiency reasons. *
*=============================================================================================*/
b3ProfileNode::b3ProfileNode( const char * name, b3ProfileNode * parent ) :
Name( name ),
TotalCalls( 0 ),
TotalTime( 0 ),
StartTime( 0 ),
RecursionCounter( 0 ),
Parent( parent ),
Child( NULL ),
Sibling( NULL ),
m_userPtr(0)
{
Reset();
}
void b3ProfileNode::CleanupMemory()
{
delete ( Child);
Child = NULL;
delete ( Sibling);
Sibling = NULL;
}
b3ProfileNode::~b3ProfileNode( void )
{
delete ( Child);
delete ( Sibling);
}
/***********************************************************************************************
* INPUT: *
* name - static string pointer to the name of the node we are searching for *
* *
* WARNINGS: *
* All profile names are assumed to be static strings so this function uses pointer compares *
* to find the named node. *
*=============================================================================================*/
b3ProfileNode * b3ProfileNode::Get_Sub_Node( const char * name )
{
// Try to find this sub node
b3ProfileNode * child = Child;
while ( child ) {
if ( child->Name == name ) {
return child;
}
child = child->Sibling;
}
// We didn't find it, so add it
b3ProfileNode * node = new b3ProfileNode( name, this );
node->Sibling = Child;
Child = node;
return node;
}
void b3ProfileNode::Reset( void )
{
TotalCalls = 0;
TotalTime = 0.0f;
if ( Child ) {
Child->Reset();
}
if ( Sibling ) {
Sibling->Reset();
}
}
void b3ProfileNode::Call( void )
{
TotalCalls++;
if (RecursionCounter++ == 0) {
b3Profile_Get_Ticks(&StartTime);
}
}
bool b3ProfileNode::Return( void )
{
if ( --RecursionCounter == 0 && TotalCalls != 0 ) {
unsigned long int time;
b3Profile_Get_Ticks(&time);
time-=StartTime;
TotalTime += (float)time / b3Profile_Get_Tick_Rate();
}
return ( RecursionCounter == 0 );
}
/***************************************************************************************************
**
** b3ProfileIterator
**
***************************************************************************************************/
b3ProfileIterator::b3ProfileIterator( b3ProfileNode * start )
{
CurrentParent = start;
CurrentChild = CurrentParent->Get_Child();
}
void b3ProfileIterator::First(void)
{
CurrentChild = CurrentParent->Get_Child();
}
void b3ProfileIterator::Next(void)
{
CurrentChild = CurrentChild->Get_Sibling();
}
bool b3ProfileIterator::Is_Done(void)
{
return CurrentChild == NULL;
}
void b3ProfileIterator::Enter_Child( int index )
{
CurrentChild = CurrentParent->Get_Child();
while ( (CurrentChild != NULL) && (index != 0) ) {
index--;
CurrentChild = CurrentChild->Get_Sibling();
}
if ( CurrentChild != NULL ) {
CurrentParent = CurrentChild;
CurrentChild = CurrentParent->Get_Child();
}
}
void b3ProfileIterator::Enter_Parent( void )
{
if ( CurrentParent->Get_Parent() != NULL ) {
CurrentParent = CurrentParent->Get_Parent();
}
CurrentChild = CurrentParent->Get_Child();
}
/***************************************************************************************************
**
** b3ProfileManager
**
***************************************************************************************************/
b3ProfileNode b3ProfileManager::Root( "Root", NULL );
b3ProfileNode * b3ProfileManager::CurrentNode = &b3ProfileManager::Root;
int b3ProfileManager::FrameCounter = 0;
unsigned long int b3ProfileManager::ResetTime = 0;
/***********************************************************************************************
* b3ProfileManager::Start_Profile -- Begin a named profile *
* *
* Steps one level deeper into the tree, if a child already exists with the specified name *
* then it accumulates the profiling; otherwise a new child node is added to the profile tree. *
* *
* INPUT: *
* name - name of this profiling record *
* *
* WARNINGS: *
* The string used is assumed to be a static string; pointer compares are used throughout *
* the profiling code for efficiency. *
*=============================================================================================*/
void b3ProfileManager::Start_Profile( const char * name )
{
if (name != CurrentNode->Get_Name()) {
CurrentNode = CurrentNode->Get_Sub_Node( name );
}
CurrentNode->Call();
}
/***********************************************************************************************
* b3ProfileManager::Stop_Profile -- Stop timing and record the results. *
*=============================================================================================*/
void b3ProfileManager::Stop_Profile( void )
{
// Return will indicate whether we should back up to our parent (we may
// be profiling a recursive function)
if (CurrentNode->Return()) {
CurrentNode = CurrentNode->Get_Parent();
}
}
/***********************************************************************************************
* b3ProfileManager::Reset -- Reset the contents of the profiling system *
* *
* This resets everything except for the tree structure. All of the timing data is reset. *
*=============================================================================================*/
void b3ProfileManager::Reset( void )
{
b3s_profileClock.reset();
Root.Reset();
Root.Call();
FrameCounter = 0;
b3Profile_Get_Ticks(&ResetTime);
}
/***********************************************************************************************
* b3ProfileManager::Increment_Frame_Counter -- Increment the frame counter *
*=============================================================================================*/
void b3ProfileManager::Increment_Frame_Counter( void )
{
FrameCounter++;
}
/***********************************************************************************************
* b3ProfileManager::Get_Time_Since_Reset -- returns the elapsed time since last reset *
*=============================================================================================*/
float b3ProfileManager::Get_Time_Since_Reset( void )
{
unsigned long int time;
b3Profile_Get_Ticks(&time);
time -= ResetTime;
return (float)time / b3Profile_Get_Tick_Rate();
}
#include <stdio.h>
void b3ProfileManager::dumpRecursive(b3ProfileIterator* profileIterator, int spacing)
{
profileIterator->First();
if (profileIterator->Is_Done())
return;
float accumulated_time=0,parent_time = profileIterator->Is_Root() ? b3ProfileManager::Get_Time_Since_Reset() : profileIterator->Get_Current_Parent_Total_Time();
int i;
int frames_since_reset = b3ProfileManager::Get_Frame_Count_Since_Reset();
for (i=0;i<spacing;i++) b3Printf(".");
b3Printf("----------------------------------\n");
for (i=0;i<spacing;i++) b3Printf(".");
b3Printf("Profiling: %s (total running time: %.3f ms) ---\n", profileIterator->Get_Current_Parent_Name(), parent_time );
float totalTime = 0.f;
int numChildren = 0;
for (i = 0; !profileIterator->Is_Done(); i++,profileIterator->Next())
{
numChildren++;
float current_total_time = profileIterator->Get_Current_Total_Time();
accumulated_time += current_total_time;
float fraction = parent_time > B3_EPSILON ? (current_total_time / parent_time) * 100 : 0.f;
{
int i; for (i=0;i<spacing;i++) b3Printf(".");
}
b3Printf("%d -- %s (%.2f %%) :: %.3f ms / frame (%d calls)\n",i, profileIterator->Get_Current_Name(), fraction,(current_total_time / (double)frames_since_reset),profileIterator->Get_Current_Total_Calls());
totalTime += current_total_time;
//recurse into children
}
if (parent_time < accumulated_time)
{
b3Printf("what's wrong\n");
}
for (i=0;i<spacing;i++) b3Printf(".");
b3Printf("%s (%.3f %%) :: %.3f ms\n", "Unaccounted:",parent_time > B3_EPSILON ? ((parent_time - accumulated_time) / parent_time) * 100 : 0.f, parent_time - accumulated_time);
for (i=0;i<numChildren;i++)
{
profileIterator->Enter_Child(i);
dumpRecursive(profileIterator,spacing+3);
profileIterator->Enter_Parent();
}
}
void b3ProfileManager::dumpAll()
{
b3ProfileIterator* profileIterator = 0;
profileIterator = b3ProfileManager::Get_Iterator();
dumpRecursive(profileIterator,0);
b3ProfileManager::Release_Iterator(profileIterator);
}
void b3ProfileManager::dumpRecursive(FILE* f, b3ProfileIterator* profileIterator, int spacing)
{
profileIterator->First();
if (profileIterator->Is_Done())
return;
float accumulated_time=0,parent_time = profileIterator->Is_Root() ? b3ProfileManager::Get_Time_Since_Reset() : profileIterator->Get_Current_Parent_Total_Time();
int i;
int frames_since_reset = b3ProfileManager::Get_Frame_Count_Since_Reset();
for (i=0;i<spacing;i++) fprintf(f,".");
fprintf(f,"----------------------------------\n");
for (i=0;i<spacing;i++) fprintf(f,".");
fprintf(f,"Profiling: %s (total running time: %.3f ms) ---\n", profileIterator->Get_Current_Parent_Name(), parent_time );
float totalTime = 0.f;
int numChildren = 0;
for (i = 0; !profileIterator->Is_Done(); i++,profileIterator->Next())
{
numChildren++;
float current_total_time = profileIterator->Get_Current_Total_Time();
accumulated_time += current_total_time;
float fraction = parent_time > B3_EPSILON ? (current_total_time / parent_time) * 100 : 0.f;
{
int i; for (i=0;i<spacing;i++) fprintf(f,".");
}
fprintf(f,"%d -- %s (%.2f %%) :: %.3f ms / frame (%d calls)\n",i, profileIterator->Get_Current_Name(), fraction,(current_total_time / (double)frames_since_reset),profileIterator->Get_Current_Total_Calls());
totalTime += current_total_time;
//recurse into children
}
if (parent_time < accumulated_time)
{
fprintf(f,"what's wrong\n");
}
for (i=0;i<spacing;i++)
fprintf(f,".");
fprintf(f,"%s (%.3f %%) :: %.3f ms\n", "Unaccounted:",parent_time > B3_EPSILON ? ((parent_time - accumulated_time) / parent_time) * 100 : 0.f, parent_time - accumulated_time);
for (i=0;i<numChildren;i++)
{
profileIterator->Enter_Child(i);
dumpRecursive(f,profileIterator,spacing+3);
profileIterator->Enter_Parent();
}
}
void b3ProfileManager::dumpAll(FILE* f)
{
b3ProfileIterator* profileIterator = 0;
profileIterator = b3ProfileManager::Get_Iterator();
dumpRecursive(f, profileIterator,0);
b3ProfileManager::Release_Iterator(profileIterator);
}
#endif //B3_NO_PROFILE

View File

@@ -1,218 +0,0 @@
/*
Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
/***************************************************************************************************
**
** Real-Time Hierarchical Profiling for Game Programming Gems 3
**
** by Greg Hjelstrom & Byon Garrabrant
**
***************************************************************************************************/
// Credits: The Clock class was inspired by the Timer classes in
// Ogre (www.ogre3d.org).
#ifndef B3_QUICK_PROF_H
#define B3_QUICK_PROF_H
//To disable built-in profiling, please comment out next line
//#define B3_NO_PROFILE 1
#ifndef B3_NO_PROFILE
#include <stdio.h>//@todo remove this, backwards compatibility
#include "b3Scalar.h"
#include "b3AlignedAllocator.h"
#include <new>
#define B3_USE_CLOCK 1
#ifdef B3_USE_CLOCK
///The b3Clock is a portable basic clock that measures accurate time in seconds, use for profiling.
class b3Clock
{
public:
b3Clock();
b3Clock(const b3Clock& other);
b3Clock& operator=(const b3Clock& other);
~b3Clock();
/// Resets the initial reference time.
void reset();
/// Returns the time in ms since the last call to reset or since
/// the b3Clock was created.
unsigned long int getTimeMilliseconds();
/// Returns the time in us since the last call to reset or since
/// the Clock was created.
unsigned long int getTimeMicroseconds();
private:
struct b3ClockData* m_data;
};
#endif //B3_USE_CLOCK
///A node in the Profile Hierarchy Tree
class b3ProfileNode {
public:
b3ProfileNode( const char * name, b3ProfileNode * parent );
~b3ProfileNode( void );
b3ProfileNode * Get_Sub_Node( const char * name );
b3ProfileNode * Get_Parent( void ) { return Parent; }
b3ProfileNode * Get_Sibling( void ) { return Sibling; }
b3ProfileNode * Get_Child( void ) { return Child; }
void CleanupMemory();
void Reset( void );
void Call( void );
bool Return( void );
const char * Get_Name( void ) { return Name; }
int Get_Total_Calls( void ) { return TotalCalls; }
float Get_Total_Time( void ) { return TotalTime; }
void* GetUserPointer() const {return m_userPtr;}
void SetUserPointer(void* ptr) { m_userPtr = ptr;}
protected:
const char * Name;
int TotalCalls;
float TotalTime;
unsigned long int StartTime;
int RecursionCounter;
b3ProfileNode * Parent;
b3ProfileNode * Child;
b3ProfileNode * Sibling;
void* m_userPtr;
};
///An iterator to navigate through the tree
class b3ProfileIterator
{
public:
// Access all the children of the current parent
void First(void);
void Next(void);
bool Is_Done(void);
bool Is_Root(void) { return (CurrentParent->Get_Parent() == 0); }
void Enter_Child( int index ); // Make the given child the new parent
void Enter_Largest_Child( void ); // Make the largest child the new parent
void Enter_Parent( void ); // Make the current parent's parent the new parent
// Access the current child
const char * Get_Current_Name( void ) { return CurrentChild->Get_Name(); }
int Get_Current_Total_Calls( void ) { return CurrentChild->Get_Total_Calls(); }
float Get_Current_Total_Time( void ) { return CurrentChild->Get_Total_Time(); }
void* Get_Current_UserPointer( void ) { return CurrentChild->GetUserPointer(); }
void Set_Current_UserPointer(void* ptr) {CurrentChild->SetUserPointer(ptr);}
// Access the current parent
const char * Get_Current_Parent_Name( void ) { return CurrentParent->Get_Name(); }
int Get_Current_Parent_Total_Calls( void ) { return CurrentParent->Get_Total_Calls(); }
float Get_Current_Parent_Total_Time( void ) { return CurrentParent->Get_Total_Time(); }
protected:
b3ProfileNode * CurrentParent;
b3ProfileNode * CurrentChild;
b3ProfileIterator( b3ProfileNode * start );
friend class b3ProfileManager;
};
///The Manager for the Profile system
class b3ProfileManager {
public:
static void Start_Profile( const char * name );
static void Stop_Profile( void );
static void CleanupMemory(void)
{
Root.CleanupMemory();
}
static void Reset( void );
static void Increment_Frame_Counter( void );
static int Get_Frame_Count_Since_Reset( void ) { return FrameCounter; }
static float Get_Time_Since_Reset( void );
static b3ProfileIterator * Get_Iterator( void )
{
return new b3ProfileIterator( &Root );
}
static void Release_Iterator( b3ProfileIterator * iterator ) { delete ( iterator); }
static void dumpRecursive(b3ProfileIterator* profileIterator, int spacing);
static void dumpAll();
static void dumpRecursive(FILE* f, b3ProfileIterator* profileIterator, int spacing);
static void dumpAll(FILE* f);
private:
static b3ProfileNode Root;
static b3ProfileNode * CurrentNode;
static int FrameCounter;
static unsigned long int ResetTime;
};
///ProfileSampleClass is a simple way to profile a function's scope
///Use the B3_PROFILE macro at the start of scope to time
class b3ProfileSample {
public:
b3ProfileSample( const char * name )
{
b3ProfileManager::Start_Profile( name );
}
~b3ProfileSample( void )
{
b3ProfileManager::Stop_Profile();
}
};
#define B3_PROFILE( name ) b3ProfileSample __profile( name )
#else
#define B3_PROFILE( name )
#endif //#ifndef B3_NO_PROFILE
#endif //B3_QUICK_PROF_H

View File

@@ -25,7 +25,7 @@ subject to the following restrictions:
#include "b3TypedConstraint.h"
#include <new>
#include "Bullet3Common/b3StackAlloc.h"
#include "Bullet3Common/b3Quickprof.h"
//#include "b3SolverBody.h"
//#include "b3SolverConstraint.h"
#include "Bullet3Common/b3AlignedObjectArray.h"

View File

@@ -2,7 +2,7 @@
#include "b3GpuSapBroadphase.h"
#include "Bullet3Common/b3Vector3.h"
#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
#include "Bullet3Common/b3Quickprof.h"
#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
#include "kernels/sapKernels.h"
#include "kernels/sapFastKernels.h"

View File

@@ -28,7 +28,7 @@ int b3g_actualSATPairTests=0;
typedef b3AlignedObjectArray<b3Vector3> b3VertexArray;
#include "Bullet3Common/b3Quickprof.h"
#include <float.h> //for FLT_MAX
#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"

View File

@@ -319,10 +319,10 @@ void b3RadixSort32CL::execute(b3OpenCLArray<b3SortData>& keyValuesInOut, int sor
//fast prefix scan is not working properly on Mac OSX yet
#ifdef _WIN32
bool fastScan=!m_deviceCPU;//only use fast scan on GPU
#else
#ifdef __APPLE__
bool fastScan=false;
#else
bool fastScan=!m_deviceCPU;//only use fast scan on GPU
#endif
if (fastScan)
@@ -653,11 +653,10 @@ void b3RadixSort32CL::execute(b3OpenCLArray<unsigned int>& keysInOut, int sortBi
//fast prefix scan is not working properly on Mac OSX yet
#ifdef _WIN32
bool fastScan=!m_deviceCPU;
#ifdef __APPLE__
bool fastScan=false;
#else
bool fastScan=false;
bool fastScan=!m_deviceCPU;
#endif
if (fastScan)

View File

@@ -2,7 +2,9 @@
#include "b3GpuRaycast.h"
#include "Bullet3OpenCL/NarrowphaseCollision/b3Collidable.h"
#include "Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h"
#include "Bullet3Common/b3Quickprof.h"
#include "Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h"
#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h"
#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
@@ -73,9 +75,57 @@ bool sphere_intersect(const b3Vector3& spherePos, b3Scalar radius, const b3Vect
return false;
}
bool rayConvex(const b3Vector3& rayFromLocal, const b3Vector3& rayToLocal, const b3ConvexPolyhedronCL& poly,
const b3AlignedObjectArray<b3GpuFace>& faces, float& hitFraction, b3Vector3& hitNormal)
{
float exitFraction = hitFraction;
float enterFraction = -0.1f;
b3Vector3 curHitNormal(0,0,0);
for (int i=0;i<poly.m_numFaces;i++)
{
const b3GpuFace& face = faces[poly.m_faceOffset+i];
float fromPlaneDist = b3Dot(rayFromLocal,face.m_plane)+face.m_plane.w;
float toPlaneDist = b3Dot(rayToLocal,face.m_plane)+face.m_plane.w;
if (fromPlaneDist<0.f)
{
if (toPlaneDist >= 0.f)
{
float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist);
if (exitFraction>fraction)
{
exitFraction = fraction;
}
}
} else
{
if (toPlaneDist<0.f)
{
float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist);
if (enterFraction <= fraction)
{
enterFraction = fraction;
curHitNormal = face.m_plane;
curHitNormal.w = 0.f;
}
} else
{
return false;
}
}
if (exitFraction <= enterFraction)
return false;
}
if (enterFraction < 0.f)
return false;
hitFraction = enterFraction;
hitNormal = curHitNormal;
return true;
}
void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults,
int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables,const struct b3Collidable* collidables)
int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables,const struct b3Collidable* collidables, const struct b3GpuNarrowPhaseInternalData* narrowphaseData)
{
// return castRays(rays,hitResults,numBodies,bodies,numCollidables,collidables);
@@ -88,6 +138,7 @@ void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays, b3A
float hitFraction = hitResults[r].m_hitFraction;
int hitBodyIndex= -1;
b3Vector3 hitNormal;
for (int b=0;b<numBodies;b++)
{
@@ -103,9 +154,34 @@ void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays, b3A
if (sphere_intersect(pos, radius, rayFrom, rayTo,hitFraction))
{
hitBodyIndex = b;
b3Vector3 hitPoint;
hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to,hitFraction);
hitNormal = (hitPoint-bodies[b].m_pos).normalize();
}
}
case SHAPE_CONVEX_HULL:
{
b3Transform convexWorldTransform;
convexWorldTransform.setIdentity();
convexWorldTransform.setOrigin(bodies[b].m_pos);
convexWorldTransform.setRotation(bodies[b].m_quat);
b3Transform convexWorld2Local = convexWorldTransform.inverse();
b3Vector3 rayFromLocal = convexWorld2Local(rayFrom);
b3Vector3 rayToLocal = convexWorld2Local(rayTo);
int shapeIndex = collidables[bodies[b].m_collidableIdx].m_shapeIndex;
const b3ConvexPolyhedronCL& poly = narrowphaseData->m_convexPolyhedra[shapeIndex];
if (rayConvex(rayFromLocal, rayToLocal,poly,narrowphaseData->m_convexFaces, hitFraction, hitNormal))
{
hitBodyIndex = b;
}
break;
}
default:
{
static bool once=true;
@@ -122,7 +198,7 @@ void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays, b3A
hitResults[r].m_hitFraction = hitFraction;
hitResults[r].m_hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to,hitFraction);
hitResults[r].m_hitNormal = (hitResults[r].m_hitPoint-bodies[hitBodyIndex].m_pos).normalize();
hitResults[r].m_hitNormal = hitNormal;
hitResults[r].m_hitResult0 = hitBodyIndex;
}
@@ -130,8 +206,9 @@ void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays, b3A
}
void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults,
int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables)
int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables, const struct b3GpuNarrowPhaseInternalData* narrowphaseData)
{
B3_PROFILE("castRaysGPU");
b3OpenCLArray<b3RayInfo> gpuRays(m_data->m_context,m_data->m_q);
@@ -141,14 +218,6 @@ void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3Align
gpuHitResults.resize(hitResults.size());
gpuHitResults.copyFromHost(hitResults);
b3OpenCLArray<b3RigidBodyCL> gpuBodies(m_data->m_context,m_data->m_q);
gpuBodies.resize(numBodies);
gpuBodies.copyFromHostPointer(bodies,numBodies);
b3OpenCLArray<b3Collidable> gpuCollidables(m_data->m_context,m_data->m_q);
gpuCollidables.resize(numCollidables);
gpuCollidables.copyFromHostPointer(collidables,numCollidables);
//run kernel
{
@@ -162,9 +231,11 @@ void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3Align
launcher.setBuffer(gpuHitResults.getBufferCL());
launcher.setConst(numBodies);
launcher.setBuffer(gpuBodies.getBufferCL());
launcher.setBuffer(gpuCollidables.getBufferCL());
launcher.setBuffer(narrowphaseData->m_bodyBufferGPU->getBufferCL());
launcher.setBuffer(narrowphaseData->m_collidablesGPU->getBufferCL());
launcher.setBuffer(narrowphaseData->m_convexFacesGPU->getBufferCL());
launcher.setBuffer(narrowphaseData->m_convexPolyhedraGPU->getBufferCL());
launcher.launch1D(numRays);
clFinish(m_data->m_q);
}

View File

@@ -18,10 +18,13 @@ public:
virtual ~b3GpuRaycast();
void castRaysHost(const b3AlignedObjectArray<b3RayInfo>& raysIn, b3AlignedObjectArray<b3RayHit>& hitResults,
int numBodies, const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables);
int numBodies, const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables,
const struct b3GpuNarrowPhaseInternalData* narrowphaseData);
void castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults,
int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables);
int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables,
const struct b3GpuNarrowPhaseInternalData* narrowphaseData
);
/* const b3OpenCLArray<b3RigidBodyCL>* bodyBuf,
b3OpenCLArray<b3Contact4>* contactOut, int& nContacts,

View File

@@ -47,6 +47,184 @@ typedef struct Collidable
} Collidable;
typedef struct
{
float4 m_localCenter;
float4 m_extents;
float4 mC;
float4 mE;
float m_radius;
int m_faceOffset;
int m_numFaces;
int m_numVertices;
int m_vertexOffset;
int m_uniqueEdgesOffset;
int m_numUniqueEdges;
int m_unused;
} ConvexPolyhedronCL;
typedef struct
{
float4 m_plane;
int m_indexOffset;
int m_numIndices;
} b3GpuFace;
///////////////////////////////////////
// Quaternion
///////////////////////////////////////
typedef float4 Quaternion;
__inline
Quaternion qtMul(Quaternion a, Quaternion b);
__inline
Quaternion qtNormalize(Quaternion in);
__inline
float4 qtRotate(Quaternion q, float4 vec);
__inline
Quaternion qtInvert(Quaternion q);
__inline
float dot3F4(float4 a, float4 b)
{
float4 a1 = (float4)(a.xyz,0.f);
float4 b1 = (float4)(b.xyz,0.f);
return dot(a1, b1);
}
__inline
Quaternion qtMul(Quaternion a, Quaternion b)
{
Quaternion ans;
ans = cross( a, b );
ans += a.w*b+b.w*a;
// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);
ans.w = a.w*b.w - dot3F4(a, b);
return ans;
}
__inline
Quaternion qtNormalize(Quaternion in)
{
return fast_normalize(in);
// in /= length( in );
// return in;
}
__inline
float4 qtRotate(Quaternion q, float4 vec)
{
Quaternion qInv = qtInvert( q );
float4 vcpy = vec;
vcpy.w = 0.f;
float4 out = qtMul(qtMul(q,vcpy),qInv);
return out;
}
__inline
Quaternion qtInvert(Quaternion q)
{
return (Quaternion)(-q.xyz, q.w);
}
__inline
float4 qtInvRotate(const Quaternion q, float4 vec)
{
return qtRotate( qtInvert( q ), vec );
}
__inline
float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)
{
return qtRotate( *orientation, *p ) + (*translation);
}
void trInverse(float4 translationIn, Quaternion orientationIn,
float4* translationOut, Quaternion* orientationOut)
{
*orientationOut = qtInvert(orientationIn);
*translationOut = qtRotate(*orientationOut, -translationIn);
}
void trMul(float4 translationA, Quaternion orientationA,
float4 translationB, Quaternion orientationB,
float4* translationOut, Quaternion* orientationOut)
{
*orientationOut = qtMul(orientationA,orientationB);
*translationOut = transform(&translationB,&translationA,&orientationA);
}
bool rayConvex(float4 rayFromLocal, float4 rayToLocal, int numFaces, int faceOffset,
__global const b3GpuFace* faces, float* hitFraction, float4* hitNormal)
{
rayFromLocal.w = 0.f;
rayToLocal.w = 0.f;
bool result = true;
float exitFraction = *hitFraction;
float enterFraction = -0.1f;
float4 curHitNormal = (float4)(0,0,0,0);
for (int i=0;i<numFaces && result;i++)
{
b3GpuFace face = faces[faceOffset+i];
float fromPlaneDist = dot(rayFromLocal,face.m_plane)+face.m_plane.w;
float toPlaneDist = dot(rayToLocal,face.m_plane)+face.m_plane.w;
if (fromPlaneDist<0.f)
{
if (toPlaneDist >= 0.f)
{
float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist);
if (exitFraction>fraction)
{
exitFraction = fraction;
}
}
} else
{
if (toPlaneDist<0.f)
{
float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist);
if (enterFraction <= fraction)
{
enterFraction = fraction;
curHitNormal = face.m_plane;
curHitNormal.w = 0.f;
}
} else
{
result = false;
}
}
if (exitFraction <= enterFraction)
result = false;
}
result = result && (enterFraction < 0.f);
if (result)
{
*hitFraction = enterFraction;
*hitNormal = curHitNormal;
}
return result;
}
bool sphere_intersect(float4 spherePos, float radius, float4 rayFrom, float4 rayTo, float* hitFraction)
{
@@ -88,10 +266,11 @@ __kernel void rayCastKernel(
__global b3RayHit* hitResults,
const int numBodies,
__global Body* bodies,
__global Collidable* collidables)
__global Collidable* collidables,
__global const b3GpuFace* faces,
__global const ConvexPolyhedronCL* convexShapes )
{
int i = get_global_id(0);
if (i<numRays)
{
@@ -100,6 +279,8 @@ __kernel void rayCastKernel(
float4 rayFrom = rays[i].m_from;
float4 rayTo = rays[i].m_to;
float hitFraction = 1.f;
float4 hitPoint;
float4 hitNormal;
int hitBodyIndex= -1;
int cachedCollidableIndex = -1;
@@ -109,7 +290,7 @@ __kernel void rayCastKernel(
{
float4 pos = bodies[b].m_pos;
// float4 orn = bodies[b].m_quat;
float4 orn = bodies[b].m_quat;
if (cachedCollidableIndex !=bodies[b].m_collidableIdx)
{
cachedCollidableIndex = bodies[b].m_collidableIdx;
@@ -123,15 +304,38 @@ __kernel void rayCastKernel(
if (sphere_intersect(pos, radius, rayFrom, rayTo, &hitFraction))
{
hitBodyIndex = b;
hitPoint = setInterpolate3(rayFrom, rayTo,hitFraction);
hitNormal = (float4) (hitPoint-bodies[b].m_pos);
}
}
if (cachedCollidable.m_shapeType == SHAPE_CONVEX_HULL)
{
float4 invPos = (float4)(0,0,0,0);
float4 invOrn = (float4)(0,0,0,0);
float4 rayFromLocal = (float4)(0,0,0,0);
float4 rayToLocal = (float4)(0,0,0,0);
trInverse(pos,orn, &invPos, &invOrn);
rayFromLocal = transform(&rayFrom, &invPos, &invOrn);
rayToLocal = transform(&rayTo, &invPos, &invOrn);
int numFaces = convexShapes[cachedCollidable.m_shapeIndex].m_numFaces;
int faceOffset = convexShapes[cachedCollidable.m_shapeIndex].m_faceOffset;
if (rayConvex(rayFromLocal, rayToLocal, numFaces, faceOffset,faces, &hitFraction, &hitNormal))
{
hitBodyIndex = b;
}
}
}
if (hitBodyIndex>=0)
{
hitResults[i].m_hitFraction = hitFraction;
hitResults[i].m_hitPoint = setInterpolate3(rayFrom, rayTo,hitFraction);
float4 hitNormal = (float4) (hitResults[i].m_hitPoint-bodies[hitBodyIndex].m_pos);
hitResults[i].m_hitPoint = hitPoint;
hitResults[i].m_hitNormal = normalize(hitNormal);
hitResults[i].m_hitResult0 = hitBodyIndex;
}

View File

@@ -49,6 +49,184 @@ static const char* rayCastKernelCL= \
"} Collidable;\n"
"\n"
"\n"
"typedef struct \n"
"{\n"
" float4 m_localCenter;\n"
" float4 m_extents;\n"
" float4 mC;\n"
" float4 mE;\n"
" \n"
" float m_radius;\n"
" int m_faceOffset;\n"
" int m_numFaces;\n"
" int m_numVertices;\n"
" \n"
" int m_vertexOffset;\n"
" int m_uniqueEdgesOffset;\n"
" int m_numUniqueEdges;\n"
" int m_unused;\n"
"\n"
"} ConvexPolyhedronCL;\n"
"\n"
"typedef struct\n"
"{\n"
" float4 m_plane;\n"
" int m_indexOffset;\n"
" int m_numIndices;\n"
"} b3GpuFace;\n"
"\n"
"\n"
"\n"
"///////////////////////////////////////\n"
"// Quaternion\n"
"///////////////////////////////////////\n"
"\n"
"typedef float4 Quaternion;\n"
"\n"
"__inline\n"
"Quaternion qtMul(Quaternion a, Quaternion b);\n"
"\n"
"__inline\n"
"Quaternion qtNormalize(Quaternion in);\n"
"\n"
"__inline\n"
"float4 qtRotate(Quaternion q, float4 vec);\n"
"\n"
"__inline\n"
"Quaternion qtInvert(Quaternion q);\n"
"\n"
"\n"
"__inline\n"
"float dot3F4(float4 a, float4 b)\n"
"{\n"
" float4 a1 = (float4)(a.xyz,0.f);\n"
" float4 b1 = (float4)(b.xyz,0.f);\n"
" return dot(a1, b1);\n"
"}\n"
"\n"
"\n"
"__inline\n"
"Quaternion qtMul(Quaternion a, Quaternion b)\n"
"{\n"
" Quaternion ans;\n"
" ans = cross( a, b );\n"
" ans += a.w*b+b.w*a;\n"
"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n"
" ans.w = a.w*b.w - dot3F4(a, b);\n"
" return ans;\n"
"}\n"
"\n"
"__inline\n"
"Quaternion qtNormalize(Quaternion in)\n"
"{\n"
" return fast_normalize(in);\n"
"// in /= length( in );\n"
"// return in;\n"
"}\n"
"__inline\n"
"float4 qtRotate(Quaternion q, float4 vec)\n"
"{\n"
" Quaternion qInv = qtInvert( q );\n"
" float4 vcpy = vec;\n"
" vcpy.w = 0.f;\n"
" float4 out = qtMul(qtMul(q,vcpy),qInv);\n"
" return out;\n"
"}\n"
"\n"
"__inline\n"
"Quaternion qtInvert(Quaternion q)\n"
"{\n"
" return (Quaternion)(-q.xyz, q.w);\n"
"}\n"
"\n"
"__inline\n"
"float4 qtInvRotate(const Quaternion q, float4 vec)\n"
"{\n"
" return qtRotate( qtInvert( q ), vec );\n"
"}\n"
"\n"
"__inline\n"
"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n"
"{\n"
" return qtRotate( *orientation, *p ) + (*translation);\n"
"}\n"
"\n"
"void trInverse(float4 translationIn, Quaternion orientationIn,\n"
" float4* translationOut, Quaternion* orientationOut)\n"
"{\n"
" *orientationOut = qtInvert(orientationIn);\n"
" *translationOut = qtRotate(*orientationOut, -translationIn);\n"
"}\n"
"\n"
"void trMul(float4 translationA, Quaternion orientationA,\n"
" float4 translationB, Quaternion orientationB,\n"
" float4* translationOut, Quaternion* orientationOut)\n"
"{\n"
" *orientationOut = qtMul(orientationA,orientationB);\n"
" *translationOut = transform(&translationB,&translationA,&orientationA);\n"
"}\n"
"\n"
"\n"
"\n"
"bool rayConvex(float4 rayFromLocal, float4 rayToLocal, int numFaces, int faceOffset,\n"
" __global const b3GpuFace* faces, float* hitFraction, float4* hitNormal)\n"
"{\n"
" rayFromLocal.w = 0.f;\n"
" rayToLocal.w = 0.f;\n"
" bool result = true;\n"
" \n"
" float exitFraction = *hitFraction;\n"
" float enterFraction = -0.1f;\n"
" float4 curHitNormal = (float4)(0,0,0,0);\n"
" for (int i=0;i<numFaces && result;i++)\n"
" {\n"
" b3GpuFace face = faces[faceOffset+i];\n"
" float fromPlaneDist = dot(rayFromLocal,face.m_plane)+face.m_plane.w;\n"
" float toPlaneDist = dot(rayToLocal,face.m_plane)+face.m_plane.w;\n"
" if (fromPlaneDist<0.f)\n"
" {\n"
" if (toPlaneDist >= 0.f)\n"
" {\n"
" float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist);\n"
" if (exitFraction>fraction)\n"
" {\n"
" exitFraction = fraction;\n"
" }\n"
" } \n"
" } else\n"
" {\n"
" if (toPlaneDist<0.f)\n"
" {\n"
" float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist);\n"
" if (enterFraction <= fraction)\n"
" {\n"
" enterFraction = fraction;\n"
" curHitNormal = face.m_plane;\n"
" curHitNormal.w = 0.f;\n"
" }\n"
" } else\n"
" {\n"
" result = false;\n"
" }\n"
" }\n"
" if (exitFraction <= enterFraction)\n"
" result = false;\n"
" }\n"
" \n"
" result = result && (enterFraction < 0.f);\n"
" \n"
" if (result)\n"
" { \n"
" *hitFraction = enterFraction;\n"
" *hitNormal = curHitNormal;\n"
" }\n"
" return result;\n"
"}\n"
"\n"
"\n"
"\n"
"\n"
"\n"
"\n"
"bool sphere_intersect(float4 spherePos, float radius, float4 rayFrom, float4 rayTo, float* hitFraction)\n"
"{\n"
@@ -90,10 +268,11 @@ static const char* rayCastKernelCL= \
" __global b3RayHit* hitResults, \n"
" const int numBodies, \n"
" __global Body* bodies,\n"
" __global Collidable* collidables)\n"
" __global Collidable* collidables,\n"
" __global const b3GpuFace* faces,\n"
" __global const ConvexPolyhedronCL* convexShapes )\n"
"{\n"
"\n"
"\n"
" int i = get_global_id(0);\n"
" if (i<numRays)\n"
" {\n"
@@ -102,6 +281,8 @@ static const char* rayCastKernelCL= \
" float4 rayFrom = rays[i].m_from;\n"
" float4 rayTo = rays[i].m_to;\n"
" float hitFraction = 1.f;\n"
" float4 hitPoint;\n"
" float4 hitNormal;\n"
" int hitBodyIndex= -1;\n"
" \n"
" int cachedCollidableIndex = -1; \n"
@@ -111,7 +292,7 @@ static const char* rayCastKernelCL= \
" {\n"
" \n"
" float4 pos = bodies[b].m_pos;\n"
" // float4 orn = bodies[b].m_quat;\n"
" float4 orn = bodies[b].m_quat;\n"
" if (cachedCollidableIndex !=bodies[b].m_collidableIdx)\n"
" {\n"
" cachedCollidableIndex = bodies[b].m_collidableIdx;\n"
@@ -125,15 +306,38 @@ static const char* rayCastKernelCL= \
" if (sphere_intersect(pos, radius, rayFrom, rayTo, &hitFraction))\n"
" {\n"
" hitBodyIndex = b;\n"
" hitPoint = setInterpolate3(rayFrom, rayTo,hitFraction);\n"
" hitNormal = (float4) (hitPoint-bodies[b].m_pos);\n"
" }\n"
" }\n"
" \n"
" if (cachedCollidable.m_shapeType == SHAPE_CONVEX_HULL)\n"
" {\n"
" \n"
" float4 invPos = (float4)(0,0,0,0);\n"
" float4 invOrn = (float4)(0,0,0,0);\n"
" float4 rayFromLocal = (float4)(0,0,0,0);\n"
" float4 rayToLocal = (float4)(0,0,0,0);\n"
" \n"
" trInverse(pos,orn, &invPos, &invOrn);\n"
" rayFromLocal = transform(&rayFrom, &invPos, &invOrn);\n"
" rayToLocal = transform(&rayTo, &invPos, &invOrn);\n"
" \n"
" int numFaces = convexShapes[cachedCollidable.m_shapeIndex].m_numFaces;\n"
" int faceOffset = convexShapes[cachedCollidable.m_shapeIndex].m_faceOffset;\n"
" \n"
" if (rayConvex(rayFromLocal, rayToLocal, numFaces, faceOffset,faces, &hitFraction, &hitNormal))\n"
" {\n"
" hitBodyIndex = b;\n"
" }\n"
" }\n"
" \n"
" }\n"
" \n"
" if (hitBodyIndex>=0)\n"
" {\n"
" hitResults[i].m_hitFraction = hitFraction;\n"
" hitResults[i].m_hitPoint = setInterpolate3(rayFrom, rayTo,hitFraction);\n"
" float4 hitNormal = (float4) (hitResults[i].m_hitPoint-bodies[hitBodyIndex].m_pos);\n"
" hitResults[i].m_hitPoint = hitPoint;\n"
" hitResults[i].m_hitNormal = normalize(hitNormal);\n"
" hitResults[i].m_hitResult0 = hitBodyIndex;\n"
" }\n"

View File

@@ -19,7 +19,7 @@ struct b3Config
int m_maxTriConvexPairCapacity;
b3Config()
:m_maxConvexBodies(128*1024),
:m_maxConvexBodies(32*1024),
m_maxVerticesPerFace(64),
m_maxFacesPerShape(12),
m_maxConvexVertices(8192),
@@ -29,7 +29,7 @@ struct b3Config
m_maxTriConvexPairCapacity(256*1024)
{
m_maxConvexShapes = m_maxConvexBodies;
m_maxBroadphasePairs = 8*m_maxConvexBodies;
m_maxBroadphasePairs = 12*m_maxConvexBodies;
m_maxContactCapacity = m_maxBroadphasePairs;
}
};

View File

@@ -5,7 +5,7 @@ bool b3GpuSolveConstraint = true;
#include "b3GpuBatchingPgsSolver.h"
#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"
#include "Bullet3Common/b3Quickprof.h"
#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
#include "Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.h"
#include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h"

View File

@@ -12,73 +12,7 @@
#include "Bullet3Geometry/b3AabbUtil.h"
#include "Bullet3OpenCL/NarrowphaseCollision/b3BvhInfo.h"
struct b3GpuNarrowPhaseInternalData
{
b3AlignedObjectArray<b3ConvexUtility*>* m_convexData;
b3AlignedObjectArray<b3ConvexPolyhedronCL> m_convexPolyhedra;
b3AlignedObjectArray<b3Vector3> m_uniqueEdges;
b3AlignedObjectArray<b3Vector3> m_convexVertices;
b3AlignedObjectArray<int> m_convexIndices;
b3OpenCLArray<b3ConvexPolyhedronCL>* m_convexPolyhedraGPU;
b3OpenCLArray<b3Vector3>* m_uniqueEdgesGPU;
b3OpenCLArray<b3Vector3>* m_convexVerticesGPU;
b3OpenCLArray<int>* m_convexIndicesGPU;
b3OpenCLArray<b3Vector3>* m_worldVertsB1GPU;
b3OpenCLArray<b3Int4>* m_clippingFacesOutGPU;
b3OpenCLArray<b3Vector3>* m_worldNormalsAGPU;
b3OpenCLArray<b3Vector3>* m_worldVertsA1GPU;
b3OpenCLArray<b3Vector3>* m_worldVertsB2GPU;
b3AlignedObjectArray<b3GpuChildShape> m_cpuChildShapes;
b3OpenCLArray<b3GpuChildShape>* m_gpuChildShapes;
b3AlignedObjectArray<b3GpuFace> m_convexFaces;
b3OpenCLArray<b3GpuFace>* m_convexFacesGPU;
GpuSatCollision* m_gpuSatCollision;
b3AlignedObjectArray<b3Int2>* m_pBufPairsCPU;
//b3OpenCLArray<b3Int2>* m_convexPairsOutGPU;
//b3OpenCLArray<b3Int2>* m_planePairs;
b3OpenCLArray<b3Contact4>* m_pBufContactOutGPU;
b3AlignedObjectArray<b3Contact4>* m_pBufContactOutCPU;
b3AlignedObjectArray<b3RigidBodyCL>* m_bodyBufferCPU;
b3OpenCLArray<b3RigidBodyCL>* m_bodyBufferGPU;
b3AlignedObjectArray<b3InertiaCL>* m_inertiaBufferCPU;
b3OpenCLArray<b3InertiaCL>* m_inertiaBufferGPU;
int m_numAcceleratedShapes;
int m_numAcceleratedRigidBodies;
b3AlignedObjectArray<b3Collidable> m_collidablesCPU;
b3OpenCLArray<b3Collidable>* m_collidablesGPU;
b3OpenCLArray<b3SapAabb>* m_localShapeAABBGPU;
b3AlignedObjectArray<b3SapAabb>* m_localShapeAABBCPU;
b3AlignedObjectArray<class b3OptimizedBvh*> m_bvhData;
b3AlignedObjectArray<b3QuantizedBvhNode> m_treeNodesCPU;
b3AlignedObjectArray<b3BvhSubtreeInfo> m_subTreesCPU;
b3AlignedObjectArray<b3BvhInfo> m_bvhInfoCPU;
b3OpenCLArray<b3BvhInfo>* m_bvhInfoGPU;
b3OpenCLArray<b3QuantizedBvhNode>* m_treeNodesGPU;
b3OpenCLArray<b3BvhSubtreeInfo>* m_subTreesGPU;
b3Config m_config;
};
#include "b3GpuNarrowPhaseInternalData.h"

View File

@@ -91,6 +91,11 @@ public:
b3Collidable& getCollidableCpu(int collidableIndex);
const b3Collidable& getCollidableCpu(int collidableIndex) const;
const b3GpuNarrowPhaseInternalData* getInternalData() const
{
return m_data;
}
const struct b3SapAabb& getLocalSpaceAabb(int collidableIndex) const;
};

View File

@@ -0,0 +1,93 @@
#ifndef B3_GPU_NARROWPHASE_INTERNAL_DATA_H
#define B3_GPU_NARROWPHASE_INTERNAL_DATA_H
#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h"
#include "Bullet3OpenCL/NarrowphaseCollision/b3ConvexPolyhedronCL.h"
#include "b3Config.h"
#include "Bullet3OpenCL/NarrowphaseCollision/b3Collidable.h"
#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h"
#include "Bullet3Common/b3AlignedObjectArray.h"
#include "Bullet3Common/b3Vector3.h"
#include "Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h"
#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h"
#include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h"
#include "Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.h"
#include "Bullet3OpenCL/NarrowphaseCollision/b3BvhInfo.h"
#include "Bullet3Common/b3Int4.h"
#include "Bullet3Common/b3Int2.h"
class b3ConvexUtility;
struct b3GpuNarrowPhaseInternalData
{
b3AlignedObjectArray<b3ConvexUtility*>* m_convexData;
b3AlignedObjectArray<b3ConvexPolyhedronCL> m_convexPolyhedra;
b3AlignedObjectArray<b3Vector3> m_uniqueEdges;
b3AlignedObjectArray<b3Vector3> m_convexVertices;
b3AlignedObjectArray<int> m_convexIndices;
b3OpenCLArray<b3ConvexPolyhedronCL>* m_convexPolyhedraGPU;
b3OpenCLArray<b3Vector3>* m_uniqueEdgesGPU;
b3OpenCLArray<b3Vector3>* m_convexVerticesGPU;
b3OpenCLArray<int>* m_convexIndicesGPU;
b3OpenCLArray<b3Vector3>* m_worldVertsB1GPU;
b3OpenCLArray<b3Int4>* m_clippingFacesOutGPU;
b3OpenCLArray<b3Vector3>* m_worldNormalsAGPU;
b3OpenCLArray<b3Vector3>* m_worldVertsA1GPU;
b3OpenCLArray<b3Vector3>* m_worldVertsB2GPU;
b3AlignedObjectArray<b3GpuChildShape> m_cpuChildShapes;
b3OpenCLArray<b3GpuChildShape>* m_gpuChildShapes;
b3AlignedObjectArray<b3GpuFace> m_convexFaces;
b3OpenCLArray<b3GpuFace>* m_convexFacesGPU;
struct GpuSatCollision* m_gpuSatCollision;
b3AlignedObjectArray<b3Int2>* m_pBufPairsCPU;
//b3OpenCLArray<b3Int2>* m_convexPairsOutGPU;
//b3OpenCLArray<b3Int2>* m_planePairs;
b3OpenCLArray<b3Contact4>* m_pBufContactOutGPU;
b3AlignedObjectArray<b3Contact4>* m_pBufContactOutCPU;
b3AlignedObjectArray<b3RigidBodyCL>* m_bodyBufferCPU;
b3OpenCLArray<b3RigidBodyCL>* m_bodyBufferGPU;
b3AlignedObjectArray<b3InertiaCL>* m_inertiaBufferCPU;
b3OpenCLArray<b3InertiaCL>* m_inertiaBufferGPU;
int m_numAcceleratedShapes;
int m_numAcceleratedRigidBodies;
b3AlignedObjectArray<b3Collidable> m_collidablesCPU;
b3OpenCLArray<b3Collidable>* m_collidablesGPU;
b3OpenCLArray<b3SapAabb>* m_localShapeAABBGPU;
b3AlignedObjectArray<b3SapAabb>* m_localShapeAABBCPU;
b3AlignedObjectArray<class b3OptimizedBvh*> m_bvhData;
b3AlignedObjectArray<b3QuantizedBvhNode> m_treeNodesCPU;
b3AlignedObjectArray<b3BvhSubtreeInfo> m_subTreesCPU;
b3AlignedObjectArray<b3BvhInfo> m_bvhInfoCPU;
b3OpenCLArray<b3BvhInfo>* m_bvhInfoGPU;
b3OpenCLArray<b3QuantizedBvhNode>* m_treeNodesGPU;
b3OpenCLArray<b3BvhSubtreeInfo>* m_subTreesGPU;
b3Config m_config;
};
#endif //B3_GPU_NARROWPHASE_INTERNAL_DATA_H

View File

@@ -31,7 +31,6 @@ bool dumpContactStats = false;
#include "b3GpuBatchingPgsSolver.h"
#include "b3Solver.h"
#include "Bullet3Common/b3Quickprof.h"
#include "b3Config.h"
#include "Bullet3OpenCL/Raycast/b3GpuRaycast.h"
@@ -458,6 +457,9 @@ int b3GpuRigidBodyPipeline::registerPhysicsInstance(float mass, const float* po
void b3GpuRigidBodyPipeline::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults)
{
this->m_data->m_raycaster->castRays(rays,hitResults,getNumBodies(),this->m_data->m_narrowphase->getBodiesCpu(),m_data->m_narrowphase->getNumCollidablesGpu(), m_data->m_narrowphase->getCollidablesCpu());
this->m_data->m_raycaster->castRays(rays,hitResults,
getNumBodies(),this->m_data->m_narrowphase->getBodiesCpu(),
m_data->m_narrowphase->getNumCollidablesGpu(), m_data->m_narrowphase->getCollidablesCpu(), m_data->m_narrowphase->getInternalData()
);
}

View File

@@ -37,7 +37,6 @@ bool useNewBatchingKernel = true;
#include "kernels/batchingKernelsNew.h"
#include "Bullet3Common/b3Quickprof.h"
#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
#include "Bullet3Common/b3Vector3.h"