763 lines
23 KiB
C++
763 lines
23 KiB
C++
/*
|
|
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
|
|
|
This software is provided 'as-is', without any express or implied warranty.
|
|
In no event will the authors be held liable for any damages arising from the use of this software.
|
|
Permission is granted to anyone to use this software for any purpose,
|
|
including commercial applications, and to alter it and redistribute it freely,
|
|
subject to the following restrictions:
|
|
|
|
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
|
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
|
3. This notice may not be removed or altered from any source distribution.
|
|
*/
|
|
//Originally written by Takahiro Harada
|
|
|
|
|
|
#define PATH "..\\..\\dynamics\\basic_demo\\Stubs\\SolverKernels"
|
|
#define BATCHING_PATH "..\\..\\dynamics\\basic_demo\\Stubs\\batchingKernels"
|
|
|
|
#define KERNEL1 "SingleBatchSolveKernel"
|
|
#define KERNEL2 "BatchSolveKernel"
|
|
|
|
#define KERNEL3 "ContactToConstraintKernel"
|
|
#define KERNEL4 "SetSortDataKernel"
|
|
#define KERNEL5 "ReorderContactKernel"
|
|
#include "SolverKernels.h"
|
|
|
|
#include "batchingKernels.h"
|
|
|
|
|
|
struct SolverDebugInfo
|
|
{
|
|
int m_valInt0;
|
|
int m_valInt1;
|
|
int m_valInt2;
|
|
int m_valInt3;
|
|
|
|
int m_valInt4;
|
|
int m_valInt5;
|
|
int m_valInt6;
|
|
int m_valInt7;
|
|
|
|
int m_valInt8;
|
|
int m_valInt9;
|
|
int m_valInt10;
|
|
int m_valInt11;
|
|
|
|
int m_valInt12;
|
|
int m_valInt13;
|
|
int m_valInt14;
|
|
int m_valInt15;
|
|
|
|
|
|
float m_val0;
|
|
float m_val1;
|
|
float m_val2;
|
|
float m_val3;
|
|
};
|
|
|
|
|
|
|
|
|
|
class SolverDeviceInl
|
|
{
|
|
public:
|
|
struct ParallelSolveData
|
|
{
|
|
Buffer<u32>* m_numConstraints;
|
|
Buffer<u32>* m_offsets;
|
|
};
|
|
};
|
|
|
|
template<DeviceType TYPE>
|
|
typename Solver<TYPE>::Data* Solver<TYPE>::allocate( const Device* device, int pairCapacity )
|
|
{
|
|
const char* src[] =
|
|
#if defined(ADL_LOAD_KERNEL_FROM_STRING)
|
|
{solverKernelsCL, 0};
|
|
#else
|
|
{0,0};
|
|
#endif
|
|
|
|
const char* src2[] =
|
|
#if defined(ADL_LOAD_KERNEL_FROM_STRING)
|
|
{batchingKernelsCL, 0};
|
|
#else
|
|
{0,0};
|
|
#endif
|
|
|
|
|
|
|
|
|
|
Data* data = new Data;
|
|
data->m_device = device;
|
|
bool cacheBatchingKernel = true;
|
|
data->m_batchingKernel = device->getKernel( BATCHING_PATH, "CreateBatches", "-I ..\\..\\ ", src2[TYPE],cacheBatchingKernel);
|
|
//data->m_batchingKernel = device->getKernel( BATCHING_PATH, "CreateBatches", "-I ..\\..\\ ", 0,cacheBatchingKernel);
|
|
bool cacheSolverKernel = true;
|
|
|
|
data->m_batchSolveKernel = device->getKernel( PATH, KERNEL2, "-I ..\\..\\ ", src[TYPE],cacheSolverKernel );
|
|
data->m_contactToConstraintKernel = device->getKernel( PATH, KERNEL3,
|
|
"-I ..\\..\\ ", src[TYPE] );
|
|
data->m_setSortDataKernel = device->getKernel( PATH, KERNEL4,
|
|
"-I ..\\..\\ ", src[TYPE] );
|
|
data->m_reorderContactKernel = device->getKernel( PATH, KERNEL5,
|
|
"-I ..\\..\\ ", src[TYPE] );
|
|
|
|
data->m_copyConstraintKernel = device->getKernel( PATH, "CopyConstraintKernel",
|
|
"-I ..\\..\\ ", src[TYPE] );
|
|
|
|
data->m_parallelSolveData = new SolverDeviceInl::ParallelSolveData;
|
|
{
|
|
SolverDeviceInl::ParallelSolveData* solveData = (SolverDeviceInl::ParallelSolveData*)data->m_parallelSolveData;
|
|
solveData->m_numConstraints = new Buffer<u32>( device, N_SPLIT*N_SPLIT );
|
|
solveData->m_offsets = new Buffer<u32>( device, N_SPLIT*N_SPLIT );
|
|
}
|
|
const int sortSize = NEXTMULTIPLEOF( pairCapacity, 512 );
|
|
|
|
|
|
//data->m_sort = RadixSort<TYPE>::allocate( data->m_device, sortSize );//todo. remove hardcode this
|
|
data->m_sort32 = RadixSort32<TYPE>::allocate( data->m_device, sortSize );//todo. remove hardcode this
|
|
|
|
data->m_search = BoundSearch<TYPE>::allocate( data->m_device, N_SPLIT*N_SPLIT );
|
|
data->m_scan = PrefixScan<TYPE>::allocate( data->m_device, N_SPLIT*N_SPLIT );
|
|
|
|
data->m_sortDataBuffer = new Buffer<SortData>( data->m_device, sortSize );
|
|
|
|
if( pairCapacity < DYNAMIC_CONTACT_ALLOCATION_THRESHOLD )
|
|
data->m_contactBuffer = new Buffer<Contact4>( data->m_device, pairCapacity );
|
|
else
|
|
data->m_contactBuffer = 0;
|
|
|
|
return data;
|
|
}
|
|
|
|
template<DeviceType TYPE>
|
|
void Solver<TYPE>::deallocate( Data* data )
|
|
{
|
|
{
|
|
SolverDeviceInl::ParallelSolveData* solveData = (SolverDeviceInl::ParallelSolveData*)data->m_parallelSolveData;
|
|
delete solveData->m_numConstraints;
|
|
delete solveData->m_offsets;
|
|
delete solveData;
|
|
}
|
|
|
|
// RadixSort<TYPE>::deallocate( data->m_sort );
|
|
RadixSort32<TYPE>::deallocate(data->m_sort32);
|
|
BoundSearch<TYPE>::deallocate( data->m_search );
|
|
PrefixScan<TYPE>::deallocate( data->m_scan );
|
|
|
|
delete data->m_sortDataBuffer;
|
|
if( data->m_contactBuffer ) delete data->m_contactBuffer;
|
|
|
|
delete data;
|
|
}
|
|
|
|
template<DeviceType TYPE>
|
|
void Solver<TYPE>::reorderConvertToConstraints( typename Solver<TYPE>::Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf,
|
|
const Buffer<RigidBodyBase::Inertia>* shapeBuf,
|
|
Buffer<Contact4>* contactsIn, SolverData contactCOut, void* additionalData,
|
|
int nContacts, const typename Solver<TYPE>::ConstraintCfg& cfg )
|
|
{
|
|
if( data->m_contactBuffer )
|
|
{
|
|
if( data->m_contactBuffer->getSize() < nContacts )
|
|
{
|
|
BT_PROFILE("delete data->m_contactBuffer;");
|
|
delete data->m_contactBuffer;
|
|
data->m_contactBuffer = 0;
|
|
}
|
|
}
|
|
if( data->m_contactBuffer == 0 )
|
|
{
|
|
BT_PROFILE("new data->m_contactBuffer;");
|
|
|
|
data->m_contactBuffer = new Buffer<Contact4>( data->m_device, nContacts );
|
|
}
|
|
Stopwatch sw;
|
|
|
|
Buffer<Contact4>* contactNative = BufferUtils::map<TYPE_CL, true>( data->m_device, contactsIn, nContacts );
|
|
|
|
//DeviceUtils::Config dhCfg;
|
|
//Device* deviceHost = DeviceUtils::allocate( TYPE_HOST, dhCfg );
|
|
if( cfg.m_enableParallelSolve )
|
|
{
|
|
SolverDeviceInl::ParallelSolveData* nativeSolveData = (SolverDeviceInl::ParallelSolveData*)data->m_parallelSolveData;
|
|
|
|
DeviceUtils::waitForCompletion( data->m_device );
|
|
sw.start();
|
|
// contactsIn -> data->m_contactBuffer
|
|
{
|
|
BT_PROFILE("sortContacts");
|
|
Solver<TYPE>::sortContacts( data, bodyBuf, contactNative, additionalData, nContacts, cfg );
|
|
DeviceUtils::waitForCompletion( data->m_device );
|
|
}
|
|
sw.split();
|
|
if(0)
|
|
{
|
|
Contact4* tmp = new Contact4[nContacts];
|
|
data->m_contactBuffer->read( tmp, nContacts );
|
|
DeviceUtils::waitForCompletion( data->m_contactBuffer->m_device );
|
|
contactNative->write( tmp, nContacts );
|
|
DeviceUtils::waitForCompletion( contactNative->m_device );
|
|
delete [] tmp;
|
|
}
|
|
else
|
|
{
|
|
BT_PROFILE("m_copyConstraintKernel");
|
|
|
|
Buffer<int4> constBuffer( data->m_device, 1, BufferBase::BUFFER_CONST );
|
|
|
|
int4 cdata; cdata.x = nContacts;
|
|
BufferInfo bInfo[] = { BufferInfo( data->m_contactBuffer ), BufferInfo( contactNative ) };
|
|
// Launcher launcher( data->m_device, data->m_device->getKernel( PATH, "CopyConstraintKernel", "-I ..\\..\\ -Wf,--c++", 0 ) );
|
|
Launcher launcher( data->m_device, data->m_copyConstraintKernel );
|
|
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
|
|
launcher.setConst( constBuffer, cdata );
|
|
launcher.launch1D( nContacts, 64 );
|
|
DeviceUtils::waitForCompletion( data->m_device );
|
|
}
|
|
{
|
|
BT_PROFILE("batchContacts");
|
|
Solver<TYPE>::batchContacts( data, contactNative, nContacts, nativeSolveData->m_numConstraints, nativeSolveData->m_offsets, cfg.m_staticIdx );
|
|
|
|
}
|
|
}
|
|
{
|
|
BT_PROFILE("waitForCompletion (batchContacts)");
|
|
DeviceUtils::waitForCompletion( data->m_device );
|
|
}
|
|
sw.split();
|
|
//================
|
|
if(0)
|
|
{
|
|
// Solver<TYPE_HOST>::Data* solverHost = Solver<TYPE_HOST>::allocate( deviceHost, nContacts );
|
|
// Solver<TYPE_HOST>::convertToConstraints( solverHost, bodyBuf, shapeBuf, contactNative, contactCOut, additionalData, nContacts, cfg );
|
|
// Solver<TYPE_HOST>::deallocate( solverHost );
|
|
}
|
|
else
|
|
{
|
|
BT_PROFILE("convertToConstraints");
|
|
Solver<TYPE>::convertToConstraints( data, bodyBuf, shapeBuf, contactNative, contactCOut, additionalData, nContacts, cfg );
|
|
}
|
|
{
|
|
BT_PROFILE("convertToConstraints waitForCompletion");
|
|
DeviceUtils::waitForCompletion( data->m_device );
|
|
}
|
|
sw.stop();
|
|
|
|
{
|
|
BT_PROFILE("printf");
|
|
|
|
float t[5];
|
|
sw.getMs( t, 3 );
|
|
// printf("%3.2f, %3.2f, %3.2f, ", t[0], t[1], t[2]);
|
|
}
|
|
|
|
{
|
|
BT_PROFILE("deallocate and unmap");
|
|
|
|
//DeviceUtils::deallocate( deviceHost );
|
|
|
|
BufferUtils::unmap<true>( contactNative, contactsIn, nContacts );
|
|
}
|
|
}
|
|
|
|
|
|
template<DeviceType TYPE>
|
|
void Solver<TYPE>::solveContactConstraint( typename Solver<TYPE>::Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf, const Buffer<RigidBodyBase::Inertia>* shapeBuf,
|
|
SolverData constraint, void* additionalData, int n )
|
|
{
|
|
if(0)
|
|
{
|
|
DeviceUtils::Config dhCfg;
|
|
Device* deviceHost = DeviceUtils::allocate( TYPE_HOST, dhCfg );
|
|
{
|
|
Solver<TYPE_HOST>::Data* hostData = Solver<TYPE_HOST>::allocate( deviceHost, 0 );
|
|
Solver<TYPE_HOST>::solveContactConstraint( hostData, bodyBuf, shapeBuf, constraint, additionalData, n );
|
|
Solver<TYPE_HOST>::deallocate( hostData );
|
|
}
|
|
DeviceUtils::deallocate( deviceHost );
|
|
return;
|
|
}
|
|
|
|
ADLASSERT( data );
|
|
|
|
Buffer<Constraint4>* cBuffer =0;
|
|
|
|
Buffer<RigidBodyBase::Body>* gBodyNative=0;
|
|
Buffer<RigidBodyBase::Inertia>* gShapeNative =0;
|
|
Buffer<Constraint4>* gConstraintNative =0;
|
|
|
|
|
|
{
|
|
BT_PROFILE("map");
|
|
cBuffer = (Buffer<Constraint4>*)constraint;
|
|
|
|
gBodyNative= BufferUtils::map<TYPE, true>( data->m_device, bodyBuf );
|
|
gShapeNative= BufferUtils::map<TYPE, true>( data->m_device, shapeBuf );
|
|
gConstraintNative = BufferUtils::map<TYPE, true>( data->m_device, cBuffer );
|
|
DeviceUtils::waitForCompletion( data->m_device );
|
|
}
|
|
|
|
Buffer<int4> constBuffer;
|
|
int4 cdata = make_int4( n, 0, 0, 0 );
|
|
{
|
|
SolverDeviceInl::ParallelSolveData* solveData = (SolverDeviceInl::ParallelSolveData*)data->m_parallelSolveData;
|
|
const int nn = N_SPLIT*N_SPLIT;
|
|
|
|
cdata.x = 0;
|
|
cdata.y = 250;
|
|
|
|
#if 0
|
|
//check how the cells are filled
|
|
unsigned int* hostCounts = new unsigned int[N_SPLIT*N_SPLIT];
|
|
solveData->m_numConstraints->read(hostCounts,N_SPLIT*N_SPLIT);
|
|
DeviceUtils::waitForCompletion( data->m_device );
|
|
for (int i=0;i<N_SPLIT*N_SPLIT;i++)
|
|
{
|
|
if (hostCounts[i])
|
|
{
|
|
printf("hostCounts[%d]=%d\n",i,hostCounts[i]);
|
|
}
|
|
}
|
|
delete[] hostCounts;
|
|
#endif
|
|
|
|
int numWorkItems = 64*nn/N_BATCHES;
|
|
#ifdef DEBUG_ME
|
|
SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems];
|
|
adl::Buffer<SolverDebugInfo> gpuDebugInfo(data->m_device,numWorkItems);
|
|
#endif
|
|
|
|
|
|
|
|
{
|
|
|
|
BT_PROFILE("m_batchSolveKernel iterations");
|
|
for(int iter=0; iter<data->m_nIterations; iter++)
|
|
{
|
|
for(int ib=0; ib<N_BATCHES; ib++)
|
|
{
|
|
#ifdef DEBUG_ME
|
|
memset(debugInfo,0,sizeof(SolverDebugInfo)*numWorkItems);
|
|
gpuDebugInfo.write(debugInfo,numWorkItems);
|
|
#endif
|
|
|
|
|
|
cdata.z = ib;
|
|
cdata.w = N_SPLIT;
|
|
|
|
|
|
|
|
BufferInfo bInfo[] = {
|
|
|
|
BufferInfo( gBodyNative ),
|
|
BufferInfo( gShapeNative ),
|
|
BufferInfo( gConstraintNative ),
|
|
BufferInfo( solveData->m_numConstraints ),
|
|
BufferInfo( solveData->m_offsets )
|
|
#ifdef DEBUG_ME
|
|
, BufferInfo(&gpuDebugInfo)
|
|
#endif
|
|
};
|
|
|
|
Launcher launcher( data->m_device, data->m_batchSolveKernel );
|
|
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
|
|
launcher.setConst( constBuffer, cdata );
|
|
|
|
launcher.launch1D( numWorkItems, 64 );
|
|
|
|
#ifdef DEBUG_ME
|
|
DeviceUtils::waitForCompletion( data->m_device );
|
|
gpuDebugInfo.read(debugInfo,numWorkItems);
|
|
DeviceUtils::waitForCompletion( data->m_device );
|
|
for (int i=0;i<numWorkItems;i++)
|
|
{
|
|
if (debugInfo[i].m_valInt2>0)
|
|
{
|
|
printf("debugInfo[i].m_valInt2 = %d\n",i,debugInfo[i].m_valInt2);
|
|
}
|
|
|
|
if (debugInfo[i].m_valInt3>0)
|
|
{
|
|
printf("debugInfo[i].m_valInt3 = %d\n",i,debugInfo[i].m_valInt3);
|
|
}
|
|
}
|
|
#endif //DEBUG_ME
|
|
|
|
|
|
}
|
|
}
|
|
|
|
DeviceUtils::waitForCompletion( data->m_device );
|
|
|
|
|
|
}
|
|
|
|
cdata.x = 1;
|
|
{
|
|
BT_PROFILE("m_batchSolveKernel iterations2");
|
|
for(int iter=0; iter<data->m_nIterations; iter++)
|
|
{
|
|
for(int ib=0; ib<N_BATCHES; ib++)
|
|
{
|
|
cdata.z = ib;
|
|
cdata.w = N_SPLIT;
|
|
|
|
BufferInfo bInfo[] = {
|
|
BufferInfo( gBodyNative ),
|
|
BufferInfo( gShapeNative ),
|
|
BufferInfo( gConstraintNative ),
|
|
BufferInfo( solveData->m_numConstraints ),
|
|
BufferInfo( solveData->m_offsets )
|
|
#ifdef DEBUG_ME
|
|
,BufferInfo(&gpuDebugInfo)
|
|
#endif //DEBUG_ME
|
|
};
|
|
Launcher launcher( data->m_device, data->m_batchSolveKernel );
|
|
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
|
|
launcher.setConst( constBuffer, cdata );
|
|
launcher.launch1D( 64*nn/N_BATCHES, 64 );
|
|
}
|
|
}
|
|
DeviceUtils::waitForCompletion( data->m_device );
|
|
|
|
}
|
|
#ifdef DEBUG_ME
|
|
delete[] debugInfo;
|
|
#endif //DEBUG_ME
|
|
}
|
|
|
|
{
|
|
BT_PROFILE("unmap");
|
|
BufferUtils::unmap<true>( gBodyNative, bodyBuf );
|
|
BufferUtils::unmap<false>( gShapeNative, shapeBuf );
|
|
BufferUtils::unmap<true>( gConstraintNative, cBuffer );
|
|
DeviceUtils::waitForCompletion( data->m_device );
|
|
}
|
|
}
|
|
|
|
template<DeviceType TYPE>
|
|
void Solver<TYPE>::convertToConstraints( typename Solver<TYPE>::Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf,
|
|
const Buffer<RigidBodyBase::Inertia>* shapeBuf,
|
|
Buffer<Contact4>* contactsIn, SolverData contactCOut, void* additionalData,
|
|
int nContacts, const ConstraintCfg& cfg )
|
|
{
|
|
ADLASSERT( data->m_device->m_type == TYPE_CL );
|
|
|
|
Buffer<RigidBodyBase::Body>* bodyNative =0;
|
|
Buffer<RigidBodyBase::Inertia>* shapeNative =0;
|
|
Buffer<Contact4>* contactNative =0;
|
|
Buffer<Constraint4>* constraintNative =0;
|
|
|
|
{
|
|
BT_PROFILE("map buffers");
|
|
|
|
bodyNative = BufferUtils::map<TYPE, true>( data->m_device, bodyBuf );
|
|
shapeNative = BufferUtils::map<TYPE, true>( data->m_device, shapeBuf );
|
|
contactNative= BufferUtils::map<TYPE, true>( data->m_device, contactsIn );
|
|
constraintNative = BufferUtils::map<TYPE, false>( data->m_device, (Buffer<Constraint4>*)contactCOut );
|
|
}
|
|
struct CB
|
|
{
|
|
int m_nContacts;
|
|
float m_dt;
|
|
float m_positionDrift;
|
|
float m_positionConstraintCoeff;
|
|
};
|
|
|
|
{
|
|
BT_PROFILE("m_contactToConstraintKernel");
|
|
CB cdata;
|
|
cdata.m_nContacts = nContacts;
|
|
cdata.m_dt = cfg.m_dt;
|
|
cdata.m_positionDrift = cfg.m_positionDrift;
|
|
cdata.m_positionConstraintCoeff = cfg.m_positionConstraintCoeff;
|
|
|
|
Buffer<CB> constBuffer( data->m_device, 1, BufferBase::BUFFER_CONST );
|
|
BufferInfo bInfo[] = { BufferInfo( contactNative ), BufferInfo( bodyNative ), BufferInfo( shapeNative ),
|
|
BufferInfo( constraintNative )};
|
|
Launcher launcher( data->m_device, data->m_contactToConstraintKernel );
|
|
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
|
|
launcher.setConst( constBuffer, cdata );
|
|
launcher.launch1D( nContacts, 64 );
|
|
DeviceUtils::waitForCompletion( data->m_device );
|
|
|
|
}
|
|
|
|
{
|
|
BT_PROFILE("unmap");
|
|
BufferUtils::unmap<false>( bodyNative, bodyBuf );
|
|
BufferUtils::unmap<false>( shapeNative, shapeBuf );
|
|
BufferUtils::unmap<false>( contactNative, contactsIn );
|
|
BufferUtils::unmap<true>( constraintNative, (Buffer<Constraint4>*)contactCOut );
|
|
}
|
|
}
|
|
|
|
template<DeviceType TYPE>
|
|
void Solver<TYPE>::sortContacts( typename Solver<TYPE>::Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf,
|
|
Buffer<Contact4>* contactsIn, void* additionalData,
|
|
int nContacts, const typename Solver<TYPE>::ConstraintCfg& cfg )
|
|
{
|
|
ADLASSERT( data->m_device->m_type == TYPE_CL );
|
|
Buffer<RigidBodyBase::Body>* bodyNative
|
|
= BufferUtils::map<TYPE_CL, true>( data->m_device, bodyBuf );
|
|
Buffer<Contact4>* contactNative
|
|
= BufferUtils::map<TYPE_CL, true>( data->m_device, contactsIn );
|
|
|
|
const int sortAlignment = 512; // todo. get this out of sort
|
|
if( cfg.m_enableParallelSolve )
|
|
{
|
|
SolverDeviceInl::ParallelSolveData* nativeSolveData = (SolverDeviceInl::ParallelSolveData*)data->m_parallelSolveData;
|
|
|
|
int sortSize = NEXTMULTIPLEOF( nContacts, sortAlignment );
|
|
|
|
Buffer<u32>* countsNative = nativeSolveData->m_numConstraints;//BufferUtils::map<TYPE_CL, false>( data->m_device, &countsHost );
|
|
Buffer<u32>* offsetsNative = nativeSolveData->m_offsets;//BufferUtils::map<TYPE_CL, false>( data->m_device, &offsetsHost );
|
|
|
|
{ // 2. set cell idx
|
|
struct CB
|
|
{
|
|
int m_nContacts;
|
|
int m_staticIdx;
|
|
float m_scale;
|
|
int m_nSplit;
|
|
};
|
|
|
|
ADLASSERT( sortSize%64 == 0 );
|
|
CB cdata;
|
|
cdata.m_nContacts = nContacts;
|
|
cdata.m_staticIdx = cfg.m_staticIdx;
|
|
cdata.m_scale = 1.f/(N_OBJ_PER_SPLIT*cfg.m_averageExtent);
|
|
cdata.m_nSplit = N_SPLIT;
|
|
|
|
Buffer<CB> constBuffer( data->m_device, 1, BufferBase::BUFFER_CONST );
|
|
BufferInfo bInfo[] = { BufferInfo( contactNative ), BufferInfo( bodyNative ), BufferInfo( data->m_sortDataBuffer ) };
|
|
Launcher launcher( data->m_device, data->m_setSortDataKernel );
|
|
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
|
|
launcher.setConst( constBuffer, cdata );
|
|
launcher.launch1D( sortSize, 64 );
|
|
}
|
|
|
|
{ // 3. sort by cell idx
|
|
int n = N_SPLIT*N_SPLIT;
|
|
int sortBit = 32;
|
|
//if( n <= 0xffff ) sortBit = 16;
|
|
//if( n <= 0xff ) sortBit = 8;
|
|
RadixSort32<TYPE>::execute( data->m_sort32, *data->m_sortDataBuffer,sortSize);
|
|
}
|
|
{ // 4. find entries
|
|
BoundSearch<TYPE>::execute( data->m_search, *data->m_sortDataBuffer, nContacts, *countsNative, N_SPLIT*N_SPLIT, BoundSearchBase::COUNT );
|
|
|
|
PrefixScan<TYPE>::execute( data->m_scan, *countsNative, *offsetsNative, N_SPLIT*N_SPLIT );
|
|
}
|
|
|
|
{ // 5. sort constraints by cellIdx
|
|
// todo. preallocate this
|
|
// ADLASSERT( contactsIn->getType() == TYPE_HOST );
|
|
// Buffer<Contact4>* out = BufferUtils::map<TYPE_CL, false>( data->m_device, contactsIn ); // copying contacts to this buffer
|
|
|
|
{
|
|
Buffer<int4> constBuffer( data->m_device, 1, BufferBase::BUFFER_CONST );
|
|
|
|
int4 cdata; cdata.x = nContacts;
|
|
BufferInfo bInfo[] = { BufferInfo( contactNative ), BufferInfo( data->m_contactBuffer ), BufferInfo( data->m_sortDataBuffer ) };
|
|
Launcher launcher( data->m_device, data->m_reorderContactKernel );
|
|
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
|
|
launcher.setConst( constBuffer, cdata );
|
|
launcher.launch1D( nContacts, 64 );
|
|
}
|
|
// BufferUtils::unmap<true>( out, contactsIn, nContacts );
|
|
}
|
|
}
|
|
|
|
BufferUtils::unmap<false>( bodyNative, bodyBuf );
|
|
BufferUtils::unmap<false>( contactNative, contactsIn );
|
|
}
|
|
|
|
template<DeviceType TYPE>
|
|
void Solver<TYPE>::batchContacts( typename Solver<TYPE>::Data* data, Buffer<Contact4>* contacts, int nContacts, Buffer<u32>* n, Buffer<u32>* offsets, int staticIdx )
|
|
{
|
|
ADLASSERT( data->m_device->m_type == TYPE_CL );
|
|
|
|
if(0)
|
|
{
|
|
BT_PROFILE("CPU classTestKernel/Kernel (batch generation?)");
|
|
|
|
DeviceUtils::Config dhCfg;
|
|
Device* deviceHost = DeviceUtils::allocate( TYPE_HOST, dhCfg );
|
|
{
|
|
Solver<TYPE_HOST>::Data* hostData = Solver<TYPE_HOST>::allocate( deviceHost, 0 );
|
|
Solver<TYPE_HOST>::batchContacts( hostData, contacts, nContacts, n, offsets, staticIdx );
|
|
Solver<TYPE_HOST>::deallocate( hostData );
|
|
}
|
|
DeviceUtils::deallocate( deviceHost );
|
|
return;
|
|
}
|
|
|
|
Buffer<Contact4>* contactNative
|
|
= BufferUtils::map<TYPE_CL, true>( data->m_device, contacts, nContacts );
|
|
Buffer<u32>* nNative
|
|
= BufferUtils::map<TYPE_CL, true>( data->m_device, n );
|
|
Buffer<u32>* offsetsNative
|
|
= BufferUtils::map<TYPE_CL, true>( data->m_device, offsets );
|
|
|
|
{
|
|
BT_PROFILE("GPU classTestKernel/Kernel (batch generation?)");
|
|
Buffer<int4> constBuffer( data->m_device, 1, BufferBase::BUFFER_CONST );
|
|
int4 cdata;
|
|
cdata.x = nContacts;
|
|
cdata.y = 0;
|
|
cdata.z = staticIdx;
|
|
|
|
int numWorkItems = 64*N_SPLIT*N_SPLIT;
|
|
#ifdef BATCH_DEBUG
|
|
SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems];
|
|
adl::Buffer<SolverDebugInfo> gpuDebugInfo(data->m_device,numWorkItems);
|
|
memset(debugInfo,0,sizeof(SolverDebugInfo)*numWorkItems);
|
|
gpuDebugInfo.write(debugInfo,numWorkItems);
|
|
#endif
|
|
|
|
|
|
BufferInfo bInfo[] = {
|
|
BufferInfo( contactNative ),
|
|
BufferInfo( data->m_contactBuffer ),
|
|
BufferInfo( nNative ),
|
|
BufferInfo( offsetsNative )
|
|
#ifdef BATCH_DEBUG
|
|
, BufferInfo(&gpuDebugInfo)
|
|
#endif
|
|
};
|
|
|
|
|
|
|
|
Launcher launcher( data->m_device, data->m_batchingKernel);
|
|
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
|
|
launcher.setConst( constBuffer, cdata );
|
|
launcher.launch1D( numWorkItems, 64 );
|
|
DeviceUtils::waitForCompletion( data->m_device );
|
|
|
|
#ifdef BATCH_DEBUG
|
|
aaaa
|
|
Contact4* hostContacts = new Contact4[nContacts];
|
|
data->m_contactBuffer->read(hostContacts,nContacts);
|
|
DeviceUtils::waitForCompletion( data->m_device );
|
|
|
|
gpuDebugInfo.read(debugInfo,numWorkItems);
|
|
DeviceUtils::waitForCompletion( data->m_device );
|
|
|
|
for (int i=0;i<numWorkItems;i++)
|
|
{
|
|
if (debugInfo[i].m_valInt1>0)
|
|
{
|
|
printf("catch\n");
|
|
}
|
|
if (debugInfo[i].m_valInt2>0)
|
|
{
|
|
printf("catch22\n");
|
|
}
|
|
|
|
if (debugInfo[i].m_valInt3>0)
|
|
{
|
|
printf("catch666\n");
|
|
}
|
|
|
|
if (debugInfo[i].m_valInt4>0)
|
|
{
|
|
printf("catch777\n");
|
|
}
|
|
}
|
|
delete[] debugInfo;
|
|
#endif //BATCH_DEBUG
|
|
|
|
}
|
|
|
|
if(0)
|
|
{
|
|
u32* nhost = new u32[N_SPLIT*N_SPLIT];
|
|
|
|
nNative->read( nhost, N_SPLIT*N_SPLIT );
|
|
|
|
Contact4* chost = new Contact4[nContacts];
|
|
data->m_contactBuffer->read( chost, nContacts );
|
|
DeviceUtils::waitForCompletion( data->m_device );
|
|
printf(">>");
|
|
int nonzero = 0;
|
|
u32 maxn = 0;
|
|
for(int i=0; i<N_SPLIT*N_SPLIT; i++)
|
|
{
|
|
printf("%d-", nhost[i]);
|
|
nonzero += (nhost[i]==0)? 0:1;
|
|
maxn = max2( nhost[i], maxn );
|
|
}
|
|
printf("\nnonzero:zero = %d:%d (%d)\n", nonzero, N_SPLIT*N_SPLIT-nonzero, maxn);
|
|
printf("\n\n");
|
|
|
|
int prev = 0;
|
|
int prevIdx = 0;
|
|
int maxNBatches = 0;
|
|
for(int i=0; i<nContacts; i++)
|
|
{
|
|
// printf("(%d, %d:%d),", chost[i].m_batchIdx, chost[i].m_bodyAPtr, chost[i].m_bodyBPtr);
|
|
if( prev != 0 && chost[i].m_batchIdx == 0 )
|
|
{
|
|
maxNBatches = max2( maxNBatches, prev );
|
|
printf("\n[%d]", prev);
|
|
|
|
//for(int j=prevIdx; j<i; j++)
|
|
//{
|
|
// printf("(%d:%d),", chost[j].m_bodyAPtr, chost[j].m_bodyBPtr);
|
|
//}
|
|
|
|
//printf("\n");
|
|
|
|
prevIdx = i;
|
|
}
|
|
|
|
printf("%d,", chost[i].m_batchIdx);
|
|
|
|
prev = chost[i].m_batchIdx;
|
|
}
|
|
printf("\n");
|
|
printf("Max: %d\n", maxNBatches);
|
|
|
|
delete [] chost;
|
|
delete [] nhost;
|
|
}
|
|
// copy buffer to buffer
|
|
contactNative->write( *data->m_contactBuffer, nContacts );
|
|
DeviceUtils::waitForCompletion( data->m_device );
|
|
|
|
if(0)
|
|
{
|
|
DeviceUtils::Config dhCfg;
|
|
Device* deviceHost = DeviceUtils::allocate( TYPE_HOST, dhCfg );
|
|
{
|
|
HostBuffer<Contact4> host( deviceHost, nContacts );
|
|
contactNative->read( host.m_ptr, nContacts );
|
|
DeviceUtils::waitForCompletion( data->m_device );
|
|
|
|
for(int i=0; i<nContacts; i++)
|
|
{
|
|
ADLASSERT( host[i].m_bodyAPtr <= (u32)staticIdx );
|
|
ADLASSERT( host[i].m_bodyBPtr <= (u32)staticIdx );
|
|
}
|
|
}
|
|
DeviceUtils::deallocate( deviceHost );
|
|
}
|
|
|
|
BufferUtils::unmap<true>( contactNative, contacts );
|
|
BufferUtils::unmap<false>( nNative, n );
|
|
BufferUtils::unmap<false>( offsetsNative, offsets );
|
|
}
|
|
|
|
#undef PATH
|
|
#undef KERNEL1
|
|
#undef KERNEL2
|
|
|
|
#undef KERNEL3
|
|
#undef KERNEL4
|
|
#undef KERNEL5
|