Add the GPU rigid body pipeline from https://github.com/erwincoumans/experiments as a Bullet 3.x preview for Bullet 2.80
This commit is contained in:
762
Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/Solver.inl
Normal file
762
Extras/RigidBodyGpuPipeline/dynamics/basic_demo/Stubs/Solver.inl
Normal file
@@ -0,0 +1,762 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
#define PATH "..\\..\\dynamics\\basic_demo\\Stubs\\SolverKernels"
|
||||
#define BATCHING_PATH "..\\..\\dynamics\\basic_demo\\Stubs\\batchingKernels"
|
||||
|
||||
#define KERNEL1 "SingleBatchSolveKernel"
|
||||
#define KERNEL2 "BatchSolveKernel"
|
||||
|
||||
#define KERNEL3 "ContactToConstraintKernel"
|
||||
#define KERNEL4 "SetSortDataKernel"
|
||||
#define KERNEL5 "ReorderContactKernel"
|
||||
#include "SolverKernels.h"
|
||||
|
||||
#include "batchingKernels.h"
|
||||
|
||||
|
||||
struct SolverDebugInfo
|
||||
{
|
||||
int m_valInt0;
|
||||
int m_valInt1;
|
||||
int m_valInt2;
|
||||
int m_valInt3;
|
||||
|
||||
int m_valInt4;
|
||||
int m_valInt5;
|
||||
int m_valInt6;
|
||||
int m_valInt7;
|
||||
|
||||
int m_valInt8;
|
||||
int m_valInt9;
|
||||
int m_valInt10;
|
||||
int m_valInt11;
|
||||
|
||||
int m_valInt12;
|
||||
int m_valInt13;
|
||||
int m_valInt14;
|
||||
int m_valInt15;
|
||||
|
||||
|
||||
float m_val0;
|
||||
float m_val1;
|
||||
float m_val2;
|
||||
float m_val3;
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
class SolverDeviceInl
|
||||
{
|
||||
public:
|
||||
struct ParallelSolveData
|
||||
{
|
||||
Buffer<u32>* m_numConstraints;
|
||||
Buffer<u32>* m_offsets;
|
||||
};
|
||||
};
|
||||
|
||||
template<DeviceType TYPE>
|
||||
typename Solver<TYPE>::Data* Solver<TYPE>::allocate( const Device* device, int pairCapacity )
|
||||
{
|
||||
const char* src[] =
|
||||
#if defined(ADL_LOAD_KERNEL_FROM_STRING)
|
||||
{solverKernelsCL, 0};
|
||||
#else
|
||||
{0,0};
|
||||
#endif
|
||||
|
||||
const char* src2[] =
|
||||
#if defined(ADL_LOAD_KERNEL_FROM_STRING)
|
||||
{batchingKernelsCL, 0};
|
||||
#else
|
||||
{0,0};
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
Data* data = new Data;
|
||||
data->m_device = device;
|
||||
bool cacheBatchingKernel = true;
|
||||
data->m_batchingKernel = device->getKernel( BATCHING_PATH, "CreateBatches", "-I ..\\..\\ ", src2[TYPE],cacheBatchingKernel);
|
||||
//data->m_batchingKernel = device->getKernel( BATCHING_PATH, "CreateBatches", "-I ..\\..\\ ", 0,cacheBatchingKernel);
|
||||
bool cacheSolverKernel = true;
|
||||
|
||||
data->m_batchSolveKernel = device->getKernel( PATH, KERNEL2, "-I ..\\..\\ ", src[TYPE],cacheSolverKernel );
|
||||
data->m_contactToConstraintKernel = device->getKernel( PATH, KERNEL3,
|
||||
"-I ..\\..\\ ", src[TYPE] );
|
||||
data->m_setSortDataKernel = device->getKernel( PATH, KERNEL4,
|
||||
"-I ..\\..\\ ", src[TYPE] );
|
||||
data->m_reorderContactKernel = device->getKernel( PATH, KERNEL5,
|
||||
"-I ..\\..\\ ", src[TYPE] );
|
||||
|
||||
data->m_copyConstraintKernel = device->getKernel( PATH, "CopyConstraintKernel",
|
||||
"-I ..\\..\\ ", src[TYPE] );
|
||||
|
||||
data->m_parallelSolveData = new SolverDeviceInl::ParallelSolveData;
|
||||
{
|
||||
SolverDeviceInl::ParallelSolveData* solveData = (SolverDeviceInl::ParallelSolveData*)data->m_parallelSolveData;
|
||||
solveData->m_numConstraints = new Buffer<u32>( device, N_SPLIT*N_SPLIT );
|
||||
solveData->m_offsets = new Buffer<u32>( device, N_SPLIT*N_SPLIT );
|
||||
}
|
||||
const int sortSize = NEXTMULTIPLEOF( pairCapacity, 512 );
|
||||
|
||||
|
||||
//data->m_sort = RadixSort<TYPE>::allocate( data->m_device, sortSize );//todo. remove hardcode this
|
||||
data->m_sort32 = RadixSort32<TYPE>::allocate( data->m_device, sortSize );//todo. remove hardcode this
|
||||
|
||||
data->m_search = BoundSearch<TYPE>::allocate( data->m_device, N_SPLIT*N_SPLIT );
|
||||
data->m_scan = PrefixScan<TYPE>::allocate( data->m_device, N_SPLIT*N_SPLIT );
|
||||
|
||||
data->m_sortDataBuffer = new Buffer<SortData>( data->m_device, sortSize );
|
||||
|
||||
if( pairCapacity < DYNAMIC_CONTACT_ALLOCATION_THRESHOLD )
|
||||
data->m_contactBuffer = new Buffer<Contact4>( data->m_device, pairCapacity );
|
||||
else
|
||||
data->m_contactBuffer = 0;
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
template<DeviceType TYPE>
|
||||
void Solver<TYPE>::deallocate( Data* data )
|
||||
{
|
||||
{
|
||||
SolverDeviceInl::ParallelSolveData* solveData = (SolverDeviceInl::ParallelSolveData*)data->m_parallelSolveData;
|
||||
delete solveData->m_numConstraints;
|
||||
delete solveData->m_offsets;
|
||||
delete solveData;
|
||||
}
|
||||
|
||||
// RadixSort<TYPE>::deallocate( data->m_sort );
|
||||
RadixSort32<TYPE>::deallocate(data->m_sort32);
|
||||
BoundSearch<TYPE>::deallocate( data->m_search );
|
||||
PrefixScan<TYPE>::deallocate( data->m_scan );
|
||||
|
||||
delete data->m_sortDataBuffer;
|
||||
if( data->m_contactBuffer ) delete data->m_contactBuffer;
|
||||
|
||||
delete data;
|
||||
}
|
||||
|
||||
template<DeviceType TYPE>
|
||||
void Solver<TYPE>::reorderConvertToConstraints( typename Solver<TYPE>::Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf,
|
||||
const Buffer<RigidBodyBase::Inertia>* shapeBuf,
|
||||
Buffer<Contact4>* contactsIn, SolverData contactCOut, void* additionalData,
|
||||
int nContacts, const typename Solver<TYPE>::ConstraintCfg& cfg )
|
||||
{
|
||||
if( data->m_contactBuffer )
|
||||
{
|
||||
if( data->m_contactBuffer->getSize() < nContacts )
|
||||
{
|
||||
BT_PROFILE("delete data->m_contactBuffer;");
|
||||
delete data->m_contactBuffer;
|
||||
data->m_contactBuffer = 0;
|
||||
}
|
||||
}
|
||||
if( data->m_contactBuffer == 0 )
|
||||
{
|
||||
BT_PROFILE("new data->m_contactBuffer;");
|
||||
|
||||
data->m_contactBuffer = new Buffer<Contact4>( data->m_device, nContacts );
|
||||
}
|
||||
Stopwatch sw;
|
||||
|
||||
Buffer<Contact4>* contactNative = BufferUtils::map<TYPE_CL, true>( data->m_device, contactsIn, nContacts );
|
||||
|
||||
//DeviceUtils::Config dhCfg;
|
||||
//Device* deviceHost = DeviceUtils::allocate( TYPE_HOST, dhCfg );
|
||||
if( cfg.m_enableParallelSolve )
|
||||
{
|
||||
SolverDeviceInl::ParallelSolveData* nativeSolveData = (SolverDeviceInl::ParallelSolveData*)data->m_parallelSolveData;
|
||||
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
sw.start();
|
||||
// contactsIn -> data->m_contactBuffer
|
||||
{
|
||||
BT_PROFILE("sortContacts");
|
||||
Solver<TYPE>::sortContacts( data, bodyBuf, contactNative, additionalData, nContacts, cfg );
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
}
|
||||
sw.split();
|
||||
if(0)
|
||||
{
|
||||
Contact4* tmp = new Contact4[nContacts];
|
||||
data->m_contactBuffer->read( tmp, nContacts );
|
||||
DeviceUtils::waitForCompletion( data->m_contactBuffer->m_device );
|
||||
contactNative->write( tmp, nContacts );
|
||||
DeviceUtils::waitForCompletion( contactNative->m_device );
|
||||
delete [] tmp;
|
||||
}
|
||||
else
|
||||
{
|
||||
BT_PROFILE("m_copyConstraintKernel");
|
||||
|
||||
Buffer<int4> constBuffer( data->m_device, 1, BufferBase::BUFFER_CONST );
|
||||
|
||||
int4 cdata; cdata.x = nContacts;
|
||||
BufferInfo bInfo[] = { BufferInfo( data->m_contactBuffer ), BufferInfo( contactNative ) };
|
||||
// Launcher launcher( data->m_device, data->m_device->getKernel( PATH, "CopyConstraintKernel", "-I ..\\..\\ -Wf,--c++", 0 ) );
|
||||
Launcher launcher( data->m_device, data->m_copyConstraintKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
|
||||
launcher.setConst( constBuffer, cdata );
|
||||
launcher.launch1D( nContacts, 64 );
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
}
|
||||
{
|
||||
BT_PROFILE("batchContacts");
|
||||
Solver<TYPE>::batchContacts( data, contactNative, nContacts, nativeSolveData->m_numConstraints, nativeSolveData->m_offsets, cfg.m_staticIdx );
|
||||
|
||||
}
|
||||
}
|
||||
{
|
||||
BT_PROFILE("waitForCompletion (batchContacts)");
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
}
|
||||
sw.split();
|
||||
//================
|
||||
if(0)
|
||||
{
|
||||
// Solver<TYPE_HOST>::Data* solverHost = Solver<TYPE_HOST>::allocate( deviceHost, nContacts );
|
||||
// Solver<TYPE_HOST>::convertToConstraints( solverHost, bodyBuf, shapeBuf, contactNative, contactCOut, additionalData, nContacts, cfg );
|
||||
// Solver<TYPE_HOST>::deallocate( solverHost );
|
||||
}
|
||||
else
|
||||
{
|
||||
BT_PROFILE("convertToConstraints");
|
||||
Solver<TYPE>::convertToConstraints( data, bodyBuf, shapeBuf, contactNative, contactCOut, additionalData, nContacts, cfg );
|
||||
}
|
||||
{
|
||||
BT_PROFILE("convertToConstraints waitForCompletion");
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
}
|
||||
sw.stop();
|
||||
|
||||
{
|
||||
BT_PROFILE("printf");
|
||||
|
||||
float t[5];
|
||||
sw.getMs( t, 3 );
|
||||
// printf("%3.2f, %3.2f, %3.2f, ", t[0], t[1], t[2]);
|
||||
}
|
||||
|
||||
{
|
||||
BT_PROFILE("deallocate and unmap");
|
||||
|
||||
//DeviceUtils::deallocate( deviceHost );
|
||||
|
||||
BufferUtils::unmap<true>( contactNative, contactsIn, nContacts );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<DeviceType TYPE>
|
||||
void Solver<TYPE>::solveContactConstraint( typename Solver<TYPE>::Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf, const Buffer<RigidBodyBase::Inertia>* shapeBuf,
|
||||
SolverData constraint, void* additionalData, int n )
|
||||
{
|
||||
if(0)
|
||||
{
|
||||
DeviceUtils::Config dhCfg;
|
||||
Device* deviceHost = DeviceUtils::allocate( TYPE_HOST, dhCfg );
|
||||
{
|
||||
Solver<TYPE_HOST>::Data* hostData = Solver<TYPE_HOST>::allocate( deviceHost, 0 );
|
||||
Solver<TYPE_HOST>::solveContactConstraint( hostData, bodyBuf, shapeBuf, constraint, additionalData, n );
|
||||
Solver<TYPE_HOST>::deallocate( hostData );
|
||||
}
|
||||
DeviceUtils::deallocate( deviceHost );
|
||||
return;
|
||||
}
|
||||
|
||||
ADLASSERT( data );
|
||||
|
||||
Buffer<Constraint4>* cBuffer =0;
|
||||
|
||||
Buffer<RigidBodyBase::Body>* gBodyNative=0;
|
||||
Buffer<RigidBodyBase::Inertia>* gShapeNative =0;
|
||||
Buffer<Constraint4>* gConstraintNative =0;
|
||||
|
||||
|
||||
{
|
||||
BT_PROFILE("map");
|
||||
cBuffer = (Buffer<Constraint4>*)constraint;
|
||||
|
||||
gBodyNative= BufferUtils::map<TYPE, true>( data->m_device, bodyBuf );
|
||||
gShapeNative= BufferUtils::map<TYPE, true>( data->m_device, shapeBuf );
|
||||
gConstraintNative = BufferUtils::map<TYPE, true>( data->m_device, cBuffer );
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
}
|
||||
|
||||
Buffer<int4> constBuffer;
|
||||
int4 cdata = make_int4( n, 0, 0, 0 );
|
||||
{
|
||||
SolverDeviceInl::ParallelSolveData* solveData = (SolverDeviceInl::ParallelSolveData*)data->m_parallelSolveData;
|
||||
const int nn = N_SPLIT*N_SPLIT;
|
||||
|
||||
cdata.x = 0;
|
||||
cdata.y = 250;
|
||||
|
||||
#if 0
|
||||
//check how the cells are filled
|
||||
unsigned int* hostCounts = new unsigned int[N_SPLIT*N_SPLIT];
|
||||
solveData->m_numConstraints->read(hostCounts,N_SPLIT*N_SPLIT);
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
for (int i=0;i<N_SPLIT*N_SPLIT;i++)
|
||||
{
|
||||
if (hostCounts[i])
|
||||
{
|
||||
printf("hostCounts[%d]=%d\n",i,hostCounts[i]);
|
||||
}
|
||||
}
|
||||
delete[] hostCounts;
|
||||
#endif
|
||||
|
||||
int numWorkItems = 64*nn/N_BATCHES;
|
||||
#ifdef DEBUG_ME
|
||||
SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems];
|
||||
adl::Buffer<SolverDebugInfo> gpuDebugInfo(data->m_device,numWorkItems);
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
{
|
||||
|
||||
BT_PROFILE("m_batchSolveKernel iterations");
|
||||
for(int iter=0; iter<data->m_nIterations; iter++)
|
||||
{
|
||||
for(int ib=0; ib<N_BATCHES; ib++)
|
||||
{
|
||||
#ifdef DEBUG_ME
|
||||
memset(debugInfo,0,sizeof(SolverDebugInfo)*numWorkItems);
|
||||
gpuDebugInfo.write(debugInfo,numWorkItems);
|
||||
#endif
|
||||
|
||||
|
||||
cdata.z = ib;
|
||||
cdata.w = N_SPLIT;
|
||||
|
||||
|
||||
|
||||
BufferInfo bInfo[] = {
|
||||
|
||||
BufferInfo( gBodyNative ),
|
||||
BufferInfo( gShapeNative ),
|
||||
BufferInfo( gConstraintNative ),
|
||||
BufferInfo( solveData->m_numConstraints ),
|
||||
BufferInfo( solveData->m_offsets )
|
||||
#ifdef DEBUG_ME
|
||||
, BufferInfo(&gpuDebugInfo)
|
||||
#endif
|
||||
};
|
||||
|
||||
Launcher launcher( data->m_device, data->m_batchSolveKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
|
||||
launcher.setConst( constBuffer, cdata );
|
||||
|
||||
launcher.launch1D( numWorkItems, 64 );
|
||||
|
||||
#ifdef DEBUG_ME
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
gpuDebugInfo.read(debugInfo,numWorkItems);
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
for (int i=0;i<numWorkItems;i++)
|
||||
{
|
||||
if (debugInfo[i].m_valInt2>0)
|
||||
{
|
||||
printf("debugInfo[i].m_valInt2 = %d\n",i,debugInfo[i].m_valInt2);
|
||||
}
|
||||
|
||||
if (debugInfo[i].m_valInt3>0)
|
||||
{
|
||||
printf("debugInfo[i].m_valInt3 = %d\n",i,debugInfo[i].m_valInt3);
|
||||
}
|
||||
}
|
||||
#endif //DEBUG_ME
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
|
||||
|
||||
}
|
||||
|
||||
cdata.x = 1;
|
||||
{
|
||||
BT_PROFILE("m_batchSolveKernel iterations2");
|
||||
for(int iter=0; iter<data->m_nIterations; iter++)
|
||||
{
|
||||
for(int ib=0; ib<N_BATCHES; ib++)
|
||||
{
|
||||
cdata.z = ib;
|
||||
cdata.w = N_SPLIT;
|
||||
|
||||
BufferInfo bInfo[] = {
|
||||
BufferInfo( gBodyNative ),
|
||||
BufferInfo( gShapeNative ),
|
||||
BufferInfo( gConstraintNative ),
|
||||
BufferInfo( solveData->m_numConstraints ),
|
||||
BufferInfo( solveData->m_offsets )
|
||||
#ifdef DEBUG_ME
|
||||
,BufferInfo(&gpuDebugInfo)
|
||||
#endif //DEBUG_ME
|
||||
};
|
||||
Launcher launcher( data->m_device, data->m_batchSolveKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
|
||||
launcher.setConst( constBuffer, cdata );
|
||||
launcher.launch1D( 64*nn/N_BATCHES, 64 );
|
||||
}
|
||||
}
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
|
||||
}
|
||||
#ifdef DEBUG_ME
|
||||
delete[] debugInfo;
|
||||
#endif //DEBUG_ME
|
||||
}
|
||||
|
||||
{
|
||||
BT_PROFILE("unmap");
|
||||
BufferUtils::unmap<true>( gBodyNative, bodyBuf );
|
||||
BufferUtils::unmap<false>( gShapeNative, shapeBuf );
|
||||
BufferUtils::unmap<true>( gConstraintNative, cBuffer );
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
}
|
||||
}
|
||||
|
||||
template<DeviceType TYPE>
|
||||
void Solver<TYPE>::convertToConstraints( typename Solver<TYPE>::Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf,
|
||||
const Buffer<RigidBodyBase::Inertia>* shapeBuf,
|
||||
Buffer<Contact4>* contactsIn, SolverData contactCOut, void* additionalData,
|
||||
int nContacts, const ConstraintCfg& cfg )
|
||||
{
|
||||
ADLASSERT( data->m_device->m_type == TYPE_CL );
|
||||
|
||||
Buffer<RigidBodyBase::Body>* bodyNative =0;
|
||||
Buffer<RigidBodyBase::Inertia>* shapeNative =0;
|
||||
Buffer<Contact4>* contactNative =0;
|
||||
Buffer<Constraint4>* constraintNative =0;
|
||||
|
||||
{
|
||||
BT_PROFILE("map buffers");
|
||||
|
||||
bodyNative = BufferUtils::map<TYPE, true>( data->m_device, bodyBuf );
|
||||
shapeNative = BufferUtils::map<TYPE, true>( data->m_device, shapeBuf );
|
||||
contactNative= BufferUtils::map<TYPE, true>( data->m_device, contactsIn );
|
||||
constraintNative = BufferUtils::map<TYPE, false>( data->m_device, (Buffer<Constraint4>*)contactCOut );
|
||||
}
|
||||
struct CB
|
||||
{
|
||||
int m_nContacts;
|
||||
float m_dt;
|
||||
float m_positionDrift;
|
||||
float m_positionConstraintCoeff;
|
||||
};
|
||||
|
||||
{
|
||||
BT_PROFILE("m_contactToConstraintKernel");
|
||||
CB cdata;
|
||||
cdata.m_nContacts = nContacts;
|
||||
cdata.m_dt = cfg.m_dt;
|
||||
cdata.m_positionDrift = cfg.m_positionDrift;
|
||||
cdata.m_positionConstraintCoeff = cfg.m_positionConstraintCoeff;
|
||||
|
||||
Buffer<CB> constBuffer( data->m_device, 1, BufferBase::BUFFER_CONST );
|
||||
BufferInfo bInfo[] = { BufferInfo( contactNative ), BufferInfo( bodyNative ), BufferInfo( shapeNative ),
|
||||
BufferInfo( constraintNative )};
|
||||
Launcher launcher( data->m_device, data->m_contactToConstraintKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
|
||||
launcher.setConst( constBuffer, cdata );
|
||||
launcher.launch1D( nContacts, 64 );
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
|
||||
}
|
||||
|
||||
{
|
||||
BT_PROFILE("unmap");
|
||||
BufferUtils::unmap<false>( bodyNative, bodyBuf );
|
||||
BufferUtils::unmap<false>( shapeNative, shapeBuf );
|
||||
BufferUtils::unmap<false>( contactNative, contactsIn );
|
||||
BufferUtils::unmap<true>( constraintNative, (Buffer<Constraint4>*)contactCOut );
|
||||
}
|
||||
}
|
||||
|
||||
template<DeviceType TYPE>
|
||||
void Solver<TYPE>::sortContacts( typename Solver<TYPE>::Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf,
|
||||
Buffer<Contact4>* contactsIn, void* additionalData,
|
||||
int nContacts, const typename Solver<TYPE>::ConstraintCfg& cfg )
|
||||
{
|
||||
ADLASSERT( data->m_device->m_type == TYPE_CL );
|
||||
Buffer<RigidBodyBase::Body>* bodyNative
|
||||
= BufferUtils::map<TYPE_CL, true>( data->m_device, bodyBuf );
|
||||
Buffer<Contact4>* contactNative
|
||||
= BufferUtils::map<TYPE_CL, true>( data->m_device, contactsIn );
|
||||
|
||||
const int sortAlignment = 512; // todo. get this out of sort
|
||||
if( cfg.m_enableParallelSolve )
|
||||
{
|
||||
SolverDeviceInl::ParallelSolveData* nativeSolveData = (SolverDeviceInl::ParallelSolveData*)data->m_parallelSolveData;
|
||||
|
||||
int sortSize = NEXTMULTIPLEOF( nContacts, sortAlignment );
|
||||
|
||||
Buffer<u32>* countsNative = nativeSolveData->m_numConstraints;//BufferUtils::map<TYPE_CL, false>( data->m_device, &countsHost );
|
||||
Buffer<u32>* offsetsNative = nativeSolveData->m_offsets;//BufferUtils::map<TYPE_CL, false>( data->m_device, &offsetsHost );
|
||||
|
||||
{ // 2. set cell idx
|
||||
struct CB
|
||||
{
|
||||
int m_nContacts;
|
||||
int m_staticIdx;
|
||||
float m_scale;
|
||||
int m_nSplit;
|
||||
};
|
||||
|
||||
ADLASSERT( sortSize%64 == 0 );
|
||||
CB cdata;
|
||||
cdata.m_nContacts = nContacts;
|
||||
cdata.m_staticIdx = cfg.m_staticIdx;
|
||||
cdata.m_scale = 1.f/(N_OBJ_PER_SPLIT*cfg.m_averageExtent);
|
||||
cdata.m_nSplit = N_SPLIT;
|
||||
|
||||
Buffer<CB> constBuffer( data->m_device, 1, BufferBase::BUFFER_CONST );
|
||||
BufferInfo bInfo[] = { BufferInfo( contactNative ), BufferInfo( bodyNative ), BufferInfo( data->m_sortDataBuffer ) };
|
||||
Launcher launcher( data->m_device, data->m_setSortDataKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
|
||||
launcher.setConst( constBuffer, cdata );
|
||||
launcher.launch1D( sortSize, 64 );
|
||||
}
|
||||
|
||||
{ // 3. sort by cell idx
|
||||
int n = N_SPLIT*N_SPLIT;
|
||||
int sortBit = 32;
|
||||
//if( n <= 0xffff ) sortBit = 16;
|
||||
//if( n <= 0xff ) sortBit = 8;
|
||||
RadixSort32<TYPE>::execute( data->m_sort32, *data->m_sortDataBuffer,sortSize);
|
||||
}
|
||||
{ // 4. find entries
|
||||
BoundSearch<TYPE>::execute( data->m_search, *data->m_sortDataBuffer, nContacts, *countsNative, N_SPLIT*N_SPLIT, BoundSearchBase::COUNT );
|
||||
|
||||
PrefixScan<TYPE>::execute( data->m_scan, *countsNative, *offsetsNative, N_SPLIT*N_SPLIT );
|
||||
}
|
||||
|
||||
{ // 5. sort constraints by cellIdx
|
||||
// todo. preallocate this
|
||||
// ADLASSERT( contactsIn->getType() == TYPE_HOST );
|
||||
// Buffer<Contact4>* out = BufferUtils::map<TYPE_CL, false>( data->m_device, contactsIn ); // copying contacts to this buffer
|
||||
|
||||
{
|
||||
Buffer<int4> constBuffer( data->m_device, 1, BufferBase::BUFFER_CONST );
|
||||
|
||||
int4 cdata; cdata.x = nContacts;
|
||||
BufferInfo bInfo[] = { BufferInfo( contactNative ), BufferInfo( data->m_contactBuffer ), BufferInfo( data->m_sortDataBuffer ) };
|
||||
Launcher launcher( data->m_device, data->m_reorderContactKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
|
||||
launcher.setConst( constBuffer, cdata );
|
||||
launcher.launch1D( nContacts, 64 );
|
||||
}
|
||||
// BufferUtils::unmap<true>( out, contactsIn, nContacts );
|
||||
}
|
||||
}
|
||||
|
||||
BufferUtils::unmap<false>( bodyNative, bodyBuf );
|
||||
BufferUtils::unmap<false>( contactNative, contactsIn );
|
||||
}
|
||||
|
||||
template<DeviceType TYPE>
|
||||
void Solver<TYPE>::batchContacts( typename Solver<TYPE>::Data* data, Buffer<Contact4>* contacts, int nContacts, Buffer<u32>* n, Buffer<u32>* offsets, int staticIdx )
|
||||
{
|
||||
ADLASSERT( data->m_device->m_type == TYPE_CL );
|
||||
|
||||
if(0)
|
||||
{
|
||||
BT_PROFILE("CPU classTestKernel/Kernel (batch generation?)");
|
||||
|
||||
DeviceUtils::Config dhCfg;
|
||||
Device* deviceHost = DeviceUtils::allocate( TYPE_HOST, dhCfg );
|
||||
{
|
||||
Solver<TYPE_HOST>::Data* hostData = Solver<TYPE_HOST>::allocate( deviceHost, 0 );
|
||||
Solver<TYPE_HOST>::batchContacts( hostData, contacts, nContacts, n, offsets, staticIdx );
|
||||
Solver<TYPE_HOST>::deallocate( hostData );
|
||||
}
|
||||
DeviceUtils::deallocate( deviceHost );
|
||||
return;
|
||||
}
|
||||
|
||||
Buffer<Contact4>* contactNative
|
||||
= BufferUtils::map<TYPE_CL, true>( data->m_device, contacts, nContacts );
|
||||
Buffer<u32>* nNative
|
||||
= BufferUtils::map<TYPE_CL, true>( data->m_device, n );
|
||||
Buffer<u32>* offsetsNative
|
||||
= BufferUtils::map<TYPE_CL, true>( data->m_device, offsets );
|
||||
|
||||
{
|
||||
BT_PROFILE("GPU classTestKernel/Kernel (batch generation?)");
|
||||
Buffer<int4> constBuffer( data->m_device, 1, BufferBase::BUFFER_CONST );
|
||||
int4 cdata;
|
||||
cdata.x = nContacts;
|
||||
cdata.y = 0;
|
||||
cdata.z = staticIdx;
|
||||
|
||||
int numWorkItems = 64*N_SPLIT*N_SPLIT;
|
||||
#ifdef BATCH_DEBUG
|
||||
SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems];
|
||||
adl::Buffer<SolverDebugInfo> gpuDebugInfo(data->m_device,numWorkItems);
|
||||
memset(debugInfo,0,sizeof(SolverDebugInfo)*numWorkItems);
|
||||
gpuDebugInfo.write(debugInfo,numWorkItems);
|
||||
#endif
|
||||
|
||||
|
||||
BufferInfo bInfo[] = {
|
||||
BufferInfo( contactNative ),
|
||||
BufferInfo( data->m_contactBuffer ),
|
||||
BufferInfo( nNative ),
|
||||
BufferInfo( offsetsNative )
|
||||
#ifdef BATCH_DEBUG
|
||||
, BufferInfo(&gpuDebugInfo)
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
|
||||
Launcher launcher( data->m_device, data->m_batchingKernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
|
||||
launcher.setConst( constBuffer, cdata );
|
||||
launcher.launch1D( numWorkItems, 64 );
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
|
||||
#ifdef BATCH_DEBUG
|
||||
aaaa
|
||||
Contact4* hostContacts = new Contact4[nContacts];
|
||||
data->m_contactBuffer->read(hostContacts,nContacts);
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
|
||||
gpuDebugInfo.read(debugInfo,numWorkItems);
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
|
||||
for (int i=0;i<numWorkItems;i++)
|
||||
{
|
||||
if (debugInfo[i].m_valInt1>0)
|
||||
{
|
||||
printf("catch\n");
|
||||
}
|
||||
if (debugInfo[i].m_valInt2>0)
|
||||
{
|
||||
printf("catch22\n");
|
||||
}
|
||||
|
||||
if (debugInfo[i].m_valInt3>0)
|
||||
{
|
||||
printf("catch666\n");
|
||||
}
|
||||
|
||||
if (debugInfo[i].m_valInt4>0)
|
||||
{
|
||||
printf("catch777\n");
|
||||
}
|
||||
}
|
||||
delete[] debugInfo;
|
||||
#endif //BATCH_DEBUG
|
||||
|
||||
}
|
||||
|
||||
if(0)
|
||||
{
|
||||
u32* nhost = new u32[N_SPLIT*N_SPLIT];
|
||||
|
||||
nNative->read( nhost, N_SPLIT*N_SPLIT );
|
||||
|
||||
Contact4* chost = new Contact4[nContacts];
|
||||
data->m_contactBuffer->read( chost, nContacts );
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
printf(">>");
|
||||
int nonzero = 0;
|
||||
u32 maxn = 0;
|
||||
for(int i=0; i<N_SPLIT*N_SPLIT; i++)
|
||||
{
|
||||
printf("%d-", nhost[i]);
|
||||
nonzero += (nhost[i]==0)? 0:1;
|
||||
maxn = max2( nhost[i], maxn );
|
||||
}
|
||||
printf("\nnonzero:zero = %d:%d (%d)\n", nonzero, N_SPLIT*N_SPLIT-nonzero, maxn);
|
||||
printf("\n\n");
|
||||
|
||||
int prev = 0;
|
||||
int prevIdx = 0;
|
||||
int maxNBatches = 0;
|
||||
for(int i=0; i<nContacts; i++)
|
||||
{
|
||||
// printf("(%d, %d:%d),", chost[i].m_batchIdx, chost[i].m_bodyAPtr, chost[i].m_bodyBPtr);
|
||||
if( prev != 0 && chost[i].m_batchIdx == 0 )
|
||||
{
|
||||
maxNBatches = max2( maxNBatches, prev );
|
||||
printf("\n[%d]", prev);
|
||||
|
||||
//for(int j=prevIdx; j<i; j++)
|
||||
//{
|
||||
// printf("(%d:%d),", chost[j].m_bodyAPtr, chost[j].m_bodyBPtr);
|
||||
//}
|
||||
|
||||
//printf("\n");
|
||||
|
||||
prevIdx = i;
|
||||
}
|
||||
|
||||
printf("%d,", chost[i].m_batchIdx);
|
||||
|
||||
prev = chost[i].m_batchIdx;
|
||||
}
|
||||
printf("\n");
|
||||
printf("Max: %d\n", maxNBatches);
|
||||
|
||||
delete [] chost;
|
||||
delete [] nhost;
|
||||
}
|
||||
// copy buffer to buffer
|
||||
contactNative->write( *data->m_contactBuffer, nContacts );
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
|
||||
if(0)
|
||||
{
|
||||
DeviceUtils::Config dhCfg;
|
||||
Device* deviceHost = DeviceUtils::allocate( TYPE_HOST, dhCfg );
|
||||
{
|
||||
HostBuffer<Contact4> host( deviceHost, nContacts );
|
||||
contactNative->read( host.m_ptr, nContacts );
|
||||
DeviceUtils::waitForCompletion( data->m_device );
|
||||
|
||||
for(int i=0; i<nContacts; i++)
|
||||
{
|
||||
ADLASSERT( host[i].m_bodyAPtr <= (u32)staticIdx );
|
||||
ADLASSERT( host[i].m_bodyBPtr <= (u32)staticIdx );
|
||||
}
|
||||
}
|
||||
DeviceUtils::deallocate( deviceHost );
|
||||
}
|
||||
|
||||
BufferUtils::unmap<true>( contactNative, contacts );
|
||||
BufferUtils::unmap<false>( nNative, n );
|
||||
BufferUtils::unmap<false>( offsetsNative, offsets );
|
||||
}
|
||||
|
||||
#undef PATH
|
||||
#undef KERNEL1
|
||||
#undef KERNEL2
|
||||
|
||||
#undef KERNEL3
|
||||
#undef KERNEL4
|
||||
#undef KERNEL5
|
||||
Reference in New Issue
Block a user