allow PairBenchmark to select broadphase type, with cpu brute-force, gpu brute-force etc
fix issue in PairBench, related to index offset allow to add a large AABB in PairBench, to see the effect on the broadphase pair search performance
This commit is contained in:
@@ -21,6 +21,9 @@
|
||||
|
||||
#include "pairsKernel.h"
|
||||
|
||||
//we use an offset, just for testing to make sure there is no assumption in the broadphase that 'index' starts at 0
|
||||
#define TEST_INDEX_OFFSET 1024
|
||||
|
||||
#ifdef B3_USE_MIDI
|
||||
#include "../../../btgui/MidiTest/RtMidi.h"
|
||||
bool chooseMidiPort( RtMidiIn *rtmidi )
|
||||
@@ -79,10 +82,16 @@ int largeCount = 0;
|
||||
|
||||
float timeStepPos = 0.000166666;
|
||||
float mAmplitude = 251.f;
|
||||
int dimensions[3]={10,10,10};
|
||||
int dimensions[3]={10,10,10};//initialized with x_dim/y_dim/z_dim
|
||||
const char* axisNames[3] = {"# x-axis","# y-axis","# z-axis"};
|
||||
extern bool gReset;
|
||||
|
||||
static int curUseLargeAabbOption=0;
|
||||
const char* useLargeAabbOptions[] =
|
||||
{
|
||||
"NoLargeAabb",
|
||||
"UseLargeAabb",
|
||||
};
|
||||
|
||||
struct BroadphaseEntry
|
||||
{
|
||||
@@ -91,7 +100,6 @@ struct BroadphaseEntry
|
||||
};
|
||||
|
||||
|
||||
|
||||
static PairBench* sPairDemo = 0;
|
||||
|
||||
#define BP_COMBO_INDEX 123
|
||||
@@ -99,9 +107,13 @@ static PairBench* sPairDemo = 0;
|
||||
static int curSelectedBroadphase = 0;
|
||||
static BroadphaseEntry allBroadphases[]=
|
||||
{
|
||||
{"Gpu 1-Sap",b3GpuSapBroadphase::CreateFunc},
|
||||
{"Gpu Grid",b3GpuGridBroadphase::CreateFunc},
|
||||
|
||||
{"CPU Brute Force",b3GpuSapBroadphase::CreateFuncBruteForceCpu},
|
||||
{"GPU Brute Force",b3GpuSapBroadphase::CreateFuncBruteForceGpu},
|
||||
{"GPU 1-SAP Original",b3GpuSapBroadphase::CreateFuncOriginal},
|
||||
{"GPU 1-SAP Barrier",b3GpuSapBroadphase::CreateFuncBarrier},
|
||||
{"GPU 1-SAP LDS",b3GpuSapBroadphase::CreateFuncLocalMemory},
|
||||
{"GPU 1-SAP LDS Batch",b3GpuSapBroadphase::CreateFuncLocalMemoryBatchWrite},
|
||||
};
|
||||
|
||||
|
||||
@@ -222,6 +234,33 @@ struct PairComboBoxHander :public Gwen::Event::Handler
|
||||
|
||||
}
|
||||
|
||||
void onSelectUseLargeAabb( Gwen::Controls::Base* pControl )
|
||||
{
|
||||
if (m_active)
|
||||
{
|
||||
Gwen::Controls::ComboBox* but = (Gwen::Controls::ComboBox*) pControl;
|
||||
|
||||
Gwen::String str = Gwen::Utility::UnicodeToString( but->GetSelectedItem()->GetText());
|
||||
|
||||
int numItems = sizeof(useLargeAabbOptions)/sizeof(const char*);
|
||||
|
||||
//find selected item
|
||||
for (int i=0;i<numItems;i++)
|
||||
{
|
||||
if (strcmp(str.c_str(),useLargeAabbOptions[i])==0)
|
||||
{
|
||||
curUseLargeAabbOption=i;
|
||||
sPairDemo->deleteBroadphase();
|
||||
sPairDemo->createBroadphase(dimensions[0],dimensions[1],dimensions[2]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
@@ -246,8 +285,8 @@ struct MySliderEventHandler : public Gwen::Event::Handler
|
||||
{
|
||||
Gwen::Controls::Slider* pSlider = (Gwen::Controls::Slider*)pControl;
|
||||
//printf("value = %f\n", pSlider->GetValue());//UnitPrint( Utility::Format( L"Slider Value: %.2f", pSlider->GetValue() ) );
|
||||
|
||||
T v = T(pSlider->GetValue());
|
||||
float bla = pSlider->GetValue();
|
||||
T v = T(bla);
|
||||
SetValue(v);
|
||||
|
||||
}
|
||||
@@ -296,6 +335,10 @@ void PairMidiCallback( double deltatime, std::vector< unsigned char > *message,
|
||||
|
||||
void PairBench::initPhysics(const ConstructionInfo& ci)
|
||||
{
|
||||
dimensions[0] = ci.arraySizeX;
|
||||
dimensions[1] = ci.arraySizeY;
|
||||
dimensions[2] = ci.arraySizeZ;
|
||||
|
||||
#ifdef B3_USE_MIDI
|
||||
m_data->m_midiIn = new RtMidiIn();
|
||||
if (!chooseMidiPort(m_data->m_midiIn))
|
||||
@@ -327,7 +370,7 @@ void PairBench::initPhysics(const ConstructionInfo& ci)
|
||||
{
|
||||
|
||||
|
||||
int startItem = 0;
|
||||
int startItem = curSelectedBroadphase;
|
||||
int numBroadphases = sizeof(allBroadphases)/sizeof(BroadphaseEntry);
|
||||
|
||||
Gwen::Controls::ComboBox* combobox = new Gwen::Controls::ComboBox(data->m_demoPage->GetPage());
|
||||
@@ -353,6 +396,33 @@ void PairBench::initPhysics(const ConstructionInfo& ci)
|
||||
data->m_curYposition+=22;
|
||||
}
|
||||
|
||||
{
|
||||
int startItem = curUseLargeAabbOption;
|
||||
int numUseLargeAabb = sizeof(useLargeAabbOptions)/sizeof(const char*);
|
||||
|
||||
Gwen::Controls::ComboBox* combobox = new Gwen::Controls::ComboBox(data->m_demoPage->GetPage());
|
||||
PairComboBoxHander* handler = new PairComboBoxHander(555);
|
||||
m_data->m_myControls.push_back(combobox);
|
||||
|
||||
|
||||
combobox->onSelection.Add(handler,&PairComboBoxHander::onSelectUseLargeAabb);
|
||||
int ypos = data->m_curYposition;
|
||||
combobox->SetPos(10, ypos );
|
||||
combobox->SetWidth( 100 );
|
||||
|
||||
for (int i=0;i<numUseLargeAabb;i++)
|
||||
{
|
||||
Gwen::Controls::MenuItem* item = combobox->AddItem(Gwen::Utility::StringToUnicode(useLargeAabbOptions[i]));
|
||||
if (i==startItem)
|
||||
combobox->OnItemSelected(item);
|
||||
}
|
||||
|
||||
handler->m_active = true;
|
||||
|
||||
data->m_curYposition+=22;
|
||||
}
|
||||
data->m_curYposition+=22;
|
||||
|
||||
if (1)
|
||||
for (int i=0;i<3;i++)
|
||||
{
|
||||
@@ -434,7 +504,7 @@ void PairBench::initPhysics(const ConstructionInfo& ci)
|
||||
int errNum=0;
|
||||
m_data->m_moveObjectsKernel = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,pairsKernelsCL,"moveObjectsKernel",&errNum,pairBenchProg);
|
||||
m_data->m_sineWaveKernel = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,pairsKernelsCL,"sineWaveKernel",&errNum,pairBenchProg);
|
||||
m_data->m_colorPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,pairsKernelsCL,"colorPairsKernel",&errNum,pairBenchProg);
|
||||
m_data->m_colorPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,pairsKernelsCL,"colorPairsKernel2",&errNum,pairBenchProg);
|
||||
m_data->m_updateAabbSimple = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,pairsKernelsCL,"updateAabbSimple",&errNum,pairBenchProg);
|
||||
|
||||
}
|
||||
@@ -469,7 +539,7 @@ void PairBench::createBroadphase(int arraySizeX, int arraySizeY, int arraySizeZ)
|
||||
int shapeId = m_instancingRenderer->registerShape(&cube_vertices[0],numVertices,cube_indices,numIndices);
|
||||
int group=1;
|
||||
int mask=1;
|
||||
int index=10;
|
||||
int index=TEST_INDEX_OFFSET;
|
||||
|
||||
|
||||
if (gPairBenchFileName)
|
||||
@@ -598,14 +668,44 @@ void PairBench::createBroadphase(int arraySizeX, int arraySizeY, int arraySizeZ)
|
||||
|
||||
b3Vector4 color=b3MakeVector4(0,1,0,1);
|
||||
b3Vector4 scaling=b3MakeVector4(1,1,1,1);
|
||||
bool large = false;
|
||||
|
||||
if (curUseLargeAabbOption)
|
||||
{
|
||||
if (i==0 && j==0 && k==0)
|
||||
{
|
||||
large = true;
|
||||
scaling[0] = 1000;
|
||||
scaling[1] = 1000;
|
||||
scaling[2] = 1000;
|
||||
}
|
||||
}
|
||||
/*if (j==0)
|
||||
{
|
||||
large=true;
|
||||
scaling[1] = 10000;
|
||||
}
|
||||
if (k==0)
|
||||
{
|
||||
large=true;
|
||||
scaling[2] = 10000;
|
||||
}*/
|
||||
|
||||
|
||||
int id = m_instancingRenderer->registerGraphicsInstance(shapeId,position,orn,color,scaling);
|
||||
b3Vector3 aabbHalfExtents=b3MakeVector3(1,1,1);
|
||||
|
||||
b3Vector3 aabbMin = position-aabbHalfExtents;
|
||||
b3Vector3 aabbMax = position+aabbHalfExtents;
|
||||
|
||||
|
||||
b3Vector3 aabbMin = position-scaling;
|
||||
b3Vector3 aabbMax = position+scaling;
|
||||
|
||||
|
||||
m_data->m_broadphaseGPU->createProxy(aabbMin,aabbMax,index,group,mask);
|
||||
if (large)
|
||||
{
|
||||
m_data->m_broadphaseGPU->createLargeProxy(aabbMin,aabbMax,index,group,mask);
|
||||
|
||||
} else
|
||||
{
|
||||
m_data->m_broadphaseGPU->createProxy(aabbMin,aabbMax,index,group,mask);
|
||||
}
|
||||
index++;
|
||||
}
|
||||
}
|
||||
@@ -674,10 +774,15 @@ void PairBench::clientMoveAndDisplay()
|
||||
//color all objects blue
|
||||
|
||||
bool animate=true;
|
||||
int numObjects= m_instancingRenderer->getInternalData()->m_totalNumInstances;
|
||||
int numObjects= 0;
|
||||
{
|
||||
B3_PROFILE("Num Objects");
|
||||
numObjects = m_instancingRenderer->getInternalData()->m_totalNumInstances;
|
||||
}
|
||||
b3Vector4* positions = 0;
|
||||
if (numObjects)
|
||||
{
|
||||
B3_PROFILE("Sync");
|
||||
GLuint vbo = m_instancingRenderer->getInternalData()->m_vbo;
|
||||
|
||||
|
||||
@@ -686,7 +791,11 @@ void PairBench::clientMoveAndDisplay()
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, vbo);
|
||||
cl_bool blocking= CL_TRUE;
|
||||
char* hostPtr= (char*)glMapBufferRange( GL_ARRAY_BUFFER,m_instancingRenderer->getMaxShapeCapacity(),arraySizeInBytes, GL_MAP_WRITE_BIT|GL_MAP_READ_BIT );//GL_READ_WRITE);//GL_WRITE_ONLY
|
||||
char* hostPtr= 0;
|
||||
{
|
||||
B3_PROFILE("glMapBufferRange");
|
||||
hostPtr = (char*)glMapBufferRange( GL_ARRAY_BUFFER,m_instancingRenderer->getMaxShapeCapacity(),arraySizeInBytes, GL_MAP_WRITE_BIT|GL_MAP_READ_BIT );//GL_READ_WRITE);//GL_WRITE_ONLY
|
||||
}
|
||||
GLint err = glGetError();
|
||||
assert(err==GL_NO_ERROR);
|
||||
positions = (b3Vector4*)hostPtr;
|
||||
@@ -716,71 +825,82 @@ void PairBench::clientMoveAndDisplay()
|
||||
{
|
||||
if (1)
|
||||
{
|
||||
if (1)
|
||||
{
|
||||
|
||||
b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_sineWaveKernel,"m_sineWaveKernel");
|
||||
launcher.setBuffer(m_data->m_instancePosOrnColor->getBufferCL() );
|
||||
launcher.setBuffer(m_data->m_bodyTimes->getBufferCL() );
|
||||
b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_sineWaveKernel,"m_sineWaveKernel");
|
||||
launcher.setBuffer(m_data->m_instancePosOrnColor->getBufferCL() );
|
||||
launcher.setBuffer(m_data->m_bodyTimes->getBufferCL() );
|
||||
|
||||
launcher.setConst(timeStepPos);
|
||||
launcher.setConst(mAmplitude);
|
||||
launcher.setConst( numObjects);
|
||||
launcher.launch1D( numObjects);
|
||||
clFinish(m_clData->m_clQueue);
|
||||
}
|
||||
else
|
||||
{
|
||||
launcher.setConst(timeStepPos);
|
||||
launcher.setConst(mAmplitude);
|
||||
launcher.setConst( numObjects);
|
||||
launcher.launch1D( numObjects);
|
||||
clFinish(m_clData->m_clQueue);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_moveObjectsKernel,"m_moveObjectsKernel");
|
||||
launcher.setBuffer(m_data->m_instancePosOrnColor->getBufferCL() );
|
||||
launcher.setConst( numObjects);
|
||||
launcher.launch1D( numObjects);
|
||||
clFinish(m_clData->m_clQueue);
|
||||
b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_moveObjectsKernel,"m_moveObjectsKernel");
|
||||
launcher.setBuffer(m_data->m_instancePosOrnColor->getBufferCL() );
|
||||
launcher.setConst( numObjects);
|
||||
launcher.launch1D( numObjects);
|
||||
clFinish(m_clData->m_clQueue);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool updateOnGpu=true;
|
||||
|
||||
if (updateOnGpu)
|
||||
if (1)
|
||||
{
|
||||
B3_PROFILE("updateOnGpu");
|
||||
b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_updateAabbSimple,"m_updateAabbSimple");
|
||||
launcher.setBuffer(m_data->m_instancePosOrnColor->getBufferCL() );
|
||||
launcher.setConst( numObjects);
|
||||
launcher.setBuffer(m_data->m_broadphaseGPU->getAabbBufferWS());
|
||||
launcher.launch1D( numObjects);
|
||||
clFinish(m_clData->m_clQueue);
|
||||
|
||||
} else
|
||||
{
|
||||
B3_PROFILE("updateOnCpu");
|
||||
if (!gPairBenchFileName)
|
||||
if (updateOnGpu)
|
||||
{
|
||||
int allAabbs = m_data->m_broadphaseGPU->getAllAabbsCPU().size();
|
||||
B3_PROFILE("updateOnGpu");
|
||||
b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_updateAabbSimple,"m_updateAabbSimple");
|
||||
launcher.setBuffer(m_data->m_instancePosOrnColor->getBufferCL() );
|
||||
launcher.setConst( numObjects);
|
||||
launcher.setBuffer(m_data->m_broadphaseGPU->getAabbBufferWS());
|
||||
launcher.launch1D( numObjects);
|
||||
clFinish(m_clData->m_clQueue);
|
||||
|
||||
} else
|
||||
{
|
||||
B3_PROFILE("updateOnCpu");
|
||||
if (!gPairBenchFileName)
|
||||
{
|
||||
int allAabbs = m_data->m_broadphaseGPU->getAllAabbsCPU().size();
|
||||
|
||||
|
||||
b3AlignedObjectArray<b3Vector4> posOrnColorsCpu;
|
||||
if (m_data->m_instancePosOrnColor)
|
||||
m_data->m_instancePosOrnColor->copyToHost(posOrnColorsCpu);
|
||||
b3AlignedObjectArray<b3Vector4> posOrnColorsCpu;
|
||||
if (m_data->m_instancePosOrnColor)
|
||||
m_data->m_instancePosOrnColor->copyToHost(posOrnColorsCpu);
|
||||
|
||||
|
||||
|
||||
for (int nodeId=0;nodeId<numObjects;nodeId++)
|
||||
{
|
||||
for (int nodeId=0;nodeId<numObjects;nodeId++)
|
||||
{
|
||||
b3Vector3 position = posOrnColorsCpu[nodeId];
|
||||
b3Vector3 halfExtents = b3MakeFloat4(1.01f,1.01f,1.01f,0.f);
|
||||
m_data->m_broadphaseGPU->getAllAabbsCPU()[nodeId].m_minVec = position-halfExtents;
|
||||
m_data->m_broadphaseGPU->getAllAabbsCPU()[nodeId].m_minIndices[3] = nodeId;
|
||||
m_data->m_broadphaseGPU->getAllAabbsCPU()[nodeId].m_maxVec = position+halfExtents;
|
||||
m_data->m_broadphaseGPU->getAllAabbsCPU()[nodeId].m_signedMaxIndices[3]= nodeId;
|
||||
{
|
||||
b3Vector3 position = posOrnColorsCpu[nodeId];
|
||||
|
||||
b3SapAabb orgAabb = m_data->m_broadphaseGPU->getAllAabbsCPU()[nodeId];
|
||||
b3Vector3 halfExtents = 0.5f*(orgAabb.m_maxVec-orgAabb.m_minVec);
|
||||
int orgNodeIndex = orgAabb.m_minIndices[3];
|
||||
int orgBroadphaseIndex = orgAabb.m_signedMaxIndices[3];
|
||||
|
||||
m_data->m_broadphaseGPU->getAllAabbsCPU()[nodeId].m_minVec = position-halfExtents;
|
||||
m_data->m_broadphaseGPU->getAllAabbsCPU()[nodeId].m_minIndices[3] = orgNodeIndex;
|
||||
m_data->m_broadphaseGPU->getAllAabbsCPU()[nodeId].m_maxVec = position+halfExtents;
|
||||
m_data->m_broadphaseGPU->getAllAabbsCPU()[nodeId].m_signedMaxIndices[3]= orgBroadphaseIndex;
|
||||
}
|
||||
}
|
||||
m_data->m_broadphaseGPU->writeAabbsToGpu();
|
||||
}
|
||||
}
|
||||
m_data->m_broadphaseGPU->writeAabbsToGpu();
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
unsigned long dt = 0;
|
||||
@@ -791,15 +911,20 @@ void PairBench::clientMoveAndDisplay()
|
||||
B3_PROFILE("calculateOverlappingPairs");
|
||||
int sz = sizeof(b3Int4)*64*numObjects;
|
||||
|
||||
m_data->m_broadphaseGPU->calculateOverlappingPairs(16*numObjects);
|
||||
int prealloc = 3*1024*1024;
|
||||
|
||||
int maxOverlap = b3Min(prealloc,16*numObjects);
|
||||
|
||||
m_data->m_broadphaseGPU->calculateOverlappingPairs(maxOverlap);
|
||||
int numPairs = m_data->m_broadphaseGPU->getNumOverlap();
|
||||
printf("numPairs = %d\n", numPairs);
|
||||
//printf("numPairs = %d\n", numPairs);
|
||||
dt = cl.getTimeMicroseconds()-dt;
|
||||
}
|
||||
|
||||
|
||||
if (m_data->m_gui)
|
||||
{
|
||||
B3_PROFILE("update Gui");
|
||||
int allAabbs = m_data->m_broadphaseGPU->getAllAabbsCPU().size();
|
||||
int numOverlap = m_data->m_broadphaseGPU->getNumOverlap();
|
||||
|
||||
@@ -836,6 +961,8 @@ void PairBench::clientMoveAndDisplay()
|
||||
launcher.setBuffer(m_data->m_instancePosOrnColor->getBufferCL() );
|
||||
launcher.setConst( numObjects);
|
||||
launcher.setBuffer( pairBuf);
|
||||
int indexOffset = TEST_INDEX_OFFSET;
|
||||
launcher.setConst(indexOffset);
|
||||
launcher.setConst( numPairs);
|
||||
launcher.launch1D( numPairs);
|
||||
clFinish(m_clData->m_clQueue);
|
||||
|
||||
@@ -12,15 +12,15 @@ __kernel void moveObjectsKernel(__global float4* posOrnColors, int numObjects)
|
||||
colors[iGID] = (float4)(0,0,1,1);
|
||||
}
|
||||
|
||||
__kernel void colorPairsKernel(__global float4* posOrnColors, int numObjects, __global const int4* pairs, int numPairs)
|
||||
__kernel void colorPairsKernel2(__global float4* posOrnColors, int numObjects, __global const int4* pairs, int indexOffset, int numPairs)
|
||||
{
|
||||
int iPairId = get_global_id(0);
|
||||
if (iPairId>=numPairs)
|
||||
return;
|
||||
__global float4* colors = &posOrnColors[numObjects*2];
|
||||
|
||||
int iObjectA = pairs[iPairId].x;
|
||||
int iObjectB = pairs[iPairId].y;
|
||||
int iObjectA = pairs[iPairId].x-indexOffset;
|
||||
int iObjectB = pairs[iPairId].y-indexOffset;
|
||||
colors[iObjectA] = (float4)(1,0,0,1);
|
||||
colors[iObjectB] = (float4)(1,0,0,1);
|
||||
}
|
||||
@@ -56,15 +56,24 @@ __kernel void updateAabbSimple( __global float4* posOrnColors, const int numNode
|
||||
int nodeId = get_global_id(0);
|
||||
if( nodeId < numNodes )
|
||||
{
|
||||
|
||||
b3AABBCL orgAabbMin = pAABB[nodeId*2];
|
||||
b3AABBCL orgAabbMax = pAABB[nodeId*2+1];
|
||||
int orgNodeId = orgAabbMin.uw;
|
||||
int orgBroadphaseIndex = orgAabbMax.uw;
|
||||
|
||||
float4 position = posOrnColors[nodeId];
|
||||
float4 halfExtents = (float4)(1.01f,1.01f,1.01f,0.f);
|
||||
float4 argAabbMinVec = (float4)(orgAabbMin.fx,orgAabbMin.fy,orgAabbMin.fz,0.f);
|
||||
float4 argAabbMaxVec = (float4)(orgAabbMax.fx,orgAabbMax.fy,orgAabbMax.fz,0.f);
|
||||
float4 halfExtents = 0.5f*(argAabbMaxVec-argAabbMinVec);
|
||||
|
||||
pAABB[nodeId*2].fx = position.x-halfExtents.x;
|
||||
pAABB[nodeId*2].fy = position.y-halfExtents.y;
|
||||
pAABB[nodeId*2].fz = position.z-halfExtents.z;
|
||||
pAABB[nodeId*2].uw = nodeId;
|
||||
pAABB[nodeId*2].uw = orgNodeId;
|
||||
pAABB[nodeId*2+1].fx = position.x+halfExtents.x;
|
||||
pAABB[nodeId*2+1].fy = position.y+halfExtents.y;
|
||||
pAABB[nodeId*2+1].fz = position.z+halfExtents.z;
|
||||
pAABB[nodeId*2+1].uw = nodeId;
|
||||
pAABB[nodeId*2+1].uw = orgBroadphaseIndex;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,14 +13,14 @@ static const char* pairsKernelsCL= \
|
||||
" __global float4* colors = &posOrnColors[numObjects*2];\n"
|
||||
" colors[iGID] = (float4)(0,0,1,1);\n"
|
||||
"}\n"
|
||||
"__kernel void colorPairsKernel(__global float4* posOrnColors, int numObjects, __global const int4* pairs, int numPairs)\n"
|
||||
"__kernel void colorPairsKernel2(__global float4* posOrnColors, int numObjects, __global const int4* pairs, int indexOffset, int numPairs)\n"
|
||||
"{\n"
|
||||
" int iPairId = get_global_id(0);\n"
|
||||
" if (iPairId>=numPairs)\n"
|
||||
" return;\n"
|
||||
" __global float4* colors = &posOrnColors[numObjects*2];\n"
|
||||
" int iObjectA = pairs[iPairId].x;\n"
|
||||
" int iObjectB = pairs[iPairId].y;\n"
|
||||
" int iObjectA = pairs[iPairId].x-indexOffset;\n"
|
||||
" int iObjectB = pairs[iPairId].y-indexOffset;\n"
|
||||
" colors[iObjectA] = (float4)(1,0,0,1);\n"
|
||||
" colors[iObjectB] = (float4)(1,0,0,1);\n"
|
||||
"}\n"
|
||||
@@ -53,16 +53,25 @@ static const char* pairsKernelsCL= \
|
||||
" int nodeId = get_global_id(0);\n"
|
||||
" if( nodeId < numNodes )\n"
|
||||
" {\n"
|
||||
" \n"
|
||||
" b3AABBCL orgAabbMin = pAABB[nodeId*2];\n"
|
||||
" b3AABBCL orgAabbMax = pAABB[nodeId*2+1];\n"
|
||||
" int orgNodeId = orgAabbMin.uw;\n"
|
||||
" int orgBroadphaseIndex = orgAabbMax.uw;\n"
|
||||
" \n"
|
||||
" float4 position = posOrnColors[nodeId];\n"
|
||||
" float4 halfExtents = (float4)(1.01f,1.01f,1.01f,0.f);\n"
|
||||
" float4 argAabbMinVec = (float4)(orgAabbMin.fx,orgAabbMin.fy,orgAabbMin.fz,0.f);\n"
|
||||
" float4 argAabbMaxVec = (float4)(orgAabbMax.fx,orgAabbMax.fy,orgAabbMax.fz,0.f);\n"
|
||||
" float4 halfExtents = 0.5f*(argAabbMaxVec-argAabbMinVec);\n"
|
||||
" \n"
|
||||
" pAABB[nodeId*2].fx = position.x-halfExtents.x;\n"
|
||||
" pAABB[nodeId*2].fy = position.y-halfExtents.y;\n"
|
||||
" pAABB[nodeId*2].fz = position.z-halfExtents.z;\n"
|
||||
" pAABB[nodeId*2].uw = nodeId;\n"
|
||||
" pAABB[nodeId*2].uw = orgNodeId;\n"
|
||||
" pAABB[nodeId*2+1].fx = position.x+halfExtents.x;\n"
|
||||
" pAABB[nodeId*2+1].fy = position.y+halfExtents.y;\n"
|
||||
" pAABB[nodeId*2+1].fz = position.z+halfExtents.z;\n"
|
||||
" pAABB[nodeId*2+1].uw = nodeId; \n"
|
||||
" pAABB[nodeId*2+1].uw = orgBroadphaseIndex; \n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
;
|
||||
|
||||
@@ -116,7 +116,7 @@ void b3GpuGridBroadphase::createProxy(const b3Vector3& aabbMin, const b3Vector3
|
||||
aabb.m_minVec = aabbMin;
|
||||
aabb.m_maxVec = aabbMax;
|
||||
aabb.m_minIndices[3] = userPtr;
|
||||
aabb.m_signedMaxIndices[3] = userPtr;
|
||||
aabb.m_signedMaxIndices[3] = m_allAabbsCPU1.size();//NOT userPtr;
|
||||
m_allAabbsCPU1.push_back(aabb);
|
||||
m_smallAabbsCPU.push_back(aabb);
|
||||
}
|
||||
@@ -126,7 +126,7 @@ void b3GpuGridBroadphase::createLargeProxy(const b3Vector3& aabbMin, const b3Ve
|
||||
aabb.m_minVec = aabbMin;
|
||||
aabb.m_maxVec = aabbMax;
|
||||
aabb.m_minIndices[3] = userPtr;
|
||||
aabb.m_signedMaxIndices[3] = userPtr;
|
||||
aabb.m_signedMaxIndices[3] = m_allAabbsCPU1.size();//NOT userPtr;
|
||||
m_allAabbsCPU1.push_back(aabb);
|
||||
m_largeAabbsCPU.push_back(aabb);
|
||||
}
|
||||
@@ -139,7 +139,7 @@ void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs)
|
||||
if (0)
|
||||
{
|
||||
calculateOverlappingPairsHost(maxPairs);
|
||||
|
||||
/*
|
||||
b3AlignedObjectArray<b3Int4> cpuPairs;
|
||||
m_gpuPairs.copyToHost(cpuPairs);
|
||||
printf("host m_gpuPairs.size()=%d\n",m_gpuPairs.size());
|
||||
@@ -147,25 +147,53 @@ void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs)
|
||||
{
|
||||
printf("host pair %d = %d,%d\n",i,cpuPairs[i].x,cpuPairs[i].y);
|
||||
}
|
||||
*/
|
||||
return;
|
||||
}
|
||||
|
||||
//sync small AABBs
|
||||
{
|
||||
int numSmallAabbs = m_smallAabbsGPU.size();
|
||||
if (numSmallAabbs)
|
||||
{
|
||||
B3_PROFILE("copyAabbsKernelSmall");
|
||||
b3BufferInfoCL bInfo[] = {
|
||||
b3BufferInfoCL( m_allAabbsGPU1.getBufferCL(), true ),
|
||||
b3BufferInfoCL( m_smallAabbsGPU.getBufferCL()),
|
||||
};
|
||||
|
||||
b3LauncherCL launcher(m_queue, m_copyAabbsKernel,"m_copyAabbsKernel" );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( numSmallAabbs );
|
||||
int num = numSmallAabbs;
|
||||
launcher.launch1D( num);
|
||||
|
||||
bool syncOnHost = false;
|
||||
if (syncOnHost)
|
||||
{
|
||||
m_allAabbsGPU1.copyToHost(this->m_allAabbsCPU1);
|
||||
b3AlignedObjectArray<b3SapAabb> hostSmallAabbs;
|
||||
m_smallAabbsGPU.copyToHost(hostSmallAabbs);
|
||||
int numSmallAabbs = hostSmallAabbs.size();
|
||||
for (int i=0;i<numSmallAabbs;i++)
|
||||
{
|
||||
//__kernel void copyAabbsKernel( __global const btAabbCL* allAabbs, __global btAabbCL* destAabbs, int numObjects)
|
||||
{
|
||||
//int i = get_global_id(0);
|
||||
//if (i>=numObjects)
|
||||
// return;
|
||||
int src = hostSmallAabbs[i].m_signedMaxIndices[3];
|
||||
hostSmallAabbs[i] = m_allAabbsCPU1[src];
|
||||
hostSmallAabbs[i].m_signedMaxIndices[3] = src;
|
||||
}
|
||||
}
|
||||
m_smallAabbsGPU.copyFromHost(hostSmallAabbs);
|
||||
} else
|
||||
{
|
||||
int numSmallAabbs = m_smallAabbsGPU.size();
|
||||
if (numSmallAabbs)
|
||||
{
|
||||
B3_PROFILE("copyAabbsKernelSmall");
|
||||
b3BufferInfoCL bInfo[] = {
|
||||
b3BufferInfoCL( m_allAabbsGPU1.getBufferCL(), true ),
|
||||
b3BufferInfoCL( m_smallAabbsGPU.getBufferCL()),
|
||||
};
|
||||
|
||||
b3LauncherCL launcher(m_queue, m_copyAabbsKernel,"m_copyAabbsKernel" );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( numSmallAabbs );
|
||||
int num = numSmallAabbs;
|
||||
launcher.launch1D( num);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//sync large AABBs
|
||||
@@ -328,19 +356,19 @@ void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs)
|
||||
}
|
||||
void b3GpuGridBroadphase::calculateOverlappingPairsHost(int maxPairs)
|
||||
{
|
||||
#if 0
|
||||
|
||||
m_hostPairs.resize(0);
|
||||
m_allAabbsGPU1.copyToHost(m_allAabbsCPU1);
|
||||
for (int i=0;i<m_allAabbsCPU.size();i++)
|
||||
for (int i=0;i<m_allAabbsCPU1.size();i++)
|
||||
{
|
||||
for (int j=i+1;j<m_allAabbsCPU.size();j++)
|
||||
for (int j=i+1;j<m_allAabbsCPU1.size();j++)
|
||||
{
|
||||
if (b3TestAabbAgainstAabb2(m_allAabbsCPU[i].m_minVec, m_allAabbsCPU[i].m_maxVec,
|
||||
m_allAabbsCPU[j].m_minVec,m_allAabbsCPU[j].m_maxVec))
|
||||
if (b3TestAabbAgainstAabb2(m_allAabbsCPU1[i].m_minVec, m_allAabbsCPU1[i].m_maxVec,
|
||||
m_allAabbsCPU1[j].m_minVec,m_allAabbsCPU1[j].m_maxVec))
|
||||
{
|
||||
b3Int4 pair;
|
||||
int a = m_allAabbsCPU[j].m_minIndices[3];
|
||||
int b = m_allAabbsCPU[i].m_minIndices[3];
|
||||
int a = m_allAabbsCPU1[j].m_minIndices[3];
|
||||
int b = m_allAabbsCPU1[i].m_minIndices[3];
|
||||
if (a<=b)
|
||||
{
|
||||
pair.x = a;
|
||||
@@ -361,7 +389,7 @@ void b3GpuGridBroadphase::calculateOverlappingPairsHost(int maxPairs)
|
||||
|
||||
|
||||
m_gpuPairs.copyFromHost(m_hostPairs);
|
||||
#endif
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ bool searchIncremental3dSapOnGpu = true;
|
||||
#define B3_BROADPHASE_SAP_PATH "src/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl"
|
||||
#define B3_BROADPHASE_SAPFAST_PATH "src/Bullet3OpenCL/BroadphaseCollision/kernels/sapFast.cl"
|
||||
|
||||
b3GpuSapBroadphase::b3GpuSapBroadphase(cl_context ctx,cl_device_id device, cl_command_queue q )
|
||||
b3GpuSapBroadphase::b3GpuSapBroadphase(cl_context ctx,cl_device_id device, cl_command_queue q , b3GpuSapKernelType kernelType)
|
||||
:m_context(ctx),
|
||||
m_device(device),
|
||||
m_queue(q),
|
||||
@@ -64,10 +64,50 @@ m_removedCountGPU(ctx,q)
|
||||
#else
|
||||
m_prefixScanFloat4 = 0;
|
||||
#endif
|
||||
//m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelOriginal",&errNum,sapProg );
|
||||
//m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelBarrier",&errNum,sapProg );
|
||||
//m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelLocalSharedMemory",&errNum,sapProg );
|
||||
m_sapKernel = 0;
|
||||
|
||||
switch (kernelType)
|
||||
{
|
||||
case B3_GPU_SAP_KERNEL_BRUTE_FORCE_CPU:
|
||||
{
|
||||
m_sapKernel=0;
|
||||
break;
|
||||
}
|
||||
case B3_GPU_SAP_KERNEL_BRUTE_FORCE_GPU:
|
||||
{
|
||||
m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelBruteForce",&errNum,sapProg );
|
||||
break;
|
||||
}
|
||||
|
||||
case B3_GPU_SAP_KERNEL_ORIGINAL:
|
||||
{
|
||||
m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelOriginal",&errNum,sapProg );
|
||||
break;
|
||||
}
|
||||
case B3_GPU_SAP_KERNEL_BARRIER:
|
||||
{
|
||||
m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelBarrier",&errNum,sapProg );
|
||||
break;
|
||||
}
|
||||
case B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY:
|
||||
{
|
||||
m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelLocalSharedMemory",&errNum,sapProg );
|
||||
break;
|
||||
}
|
||||
|
||||
case B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY_BATCH_WRITE:
|
||||
{
|
||||
m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapFastSrc, "computePairsKernelLocalSharedMemoryBatchWrite",&errNum,sapFastProg );
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelLocalSharedMemory",&errNum,sapProg );
|
||||
b3Error("Unknown 3D GPU SAP provided, fallback to computePairsKernelLocalSharedMemory");
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
m_sap2Kernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelTwoArrays",&errNum,sapProg );
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
@@ -78,21 +118,21 @@ m_removedCountGPU(ctx,q)
|
||||
m_computePairsIncremental3dSapKernel= b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapFastSrc, "computePairsIncremental3dSapKernel",&errNum,sapFastProg );
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
|
||||
|
||||
/*
|
||||
#if 0
|
||||
|
||||
m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelOriginal",&errNum,sapProg );
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
#else
|
||||
#ifndef __APPLE__
|
||||
m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapFastSrc, "computePairsKernel",&errNum,sapFastProg );
|
||||
m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapFastSrc, "computePairsKernelLocalSharedMemoryBatchWrite",&errNum,sapFastProg );
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
#else
|
||||
m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelLocalSharedMemory",&errNum,sapProg );
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
*/
|
||||
m_flipFloatKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "flipFloatKernel",&errNum,sapProg );
|
||||
|
||||
m_copyAabbsKernel= b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "copyAabbsKernel",&errNum,sapProg );
|
||||
@@ -980,7 +1020,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHost(int maxPairs)
|
||||
m_overlappingPairs.resize(0);
|
||||
}
|
||||
|
||||
init3dSap();
|
||||
//init3dSap();
|
||||
|
||||
}
|
||||
|
||||
@@ -999,9 +1039,15 @@ void b3GpuSapBroadphase::reset()
|
||||
|
||||
void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs)
|
||||
{
|
||||
if (m_sapKernel==0)
|
||||
{
|
||||
calculateOverlappingPairsHost(maxPairs);
|
||||
return;
|
||||
}
|
||||
//if (m_currentBuffer>=0)
|
||||
// return calculateOverlappingPairsHostIncremental3Sap();
|
||||
|
||||
//calculateOverlappingPairsHost(maxPairs);
|
||||
|
||||
B3_PROFILE("GPU 1-axis SAP calculateOverlappingPairs");
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@ class b3Vector3;
|
||||
|
||||
#include "b3GpuBroadphaseInterface.h"
|
||||
|
||||
|
||||
class b3GpuSapBroadphase : public b3GpuBroadphaseInterface
|
||||
{
|
||||
|
||||
@@ -87,12 +88,44 @@ public:
|
||||
|
||||
class b3PrefixScanFloat4CL* m_prefixScanFloat4;
|
||||
|
||||
b3GpuSapBroadphase(cl_context ctx,cl_device_id device, cl_command_queue q );
|
||||
enum b3GpuSapKernelType
|
||||
{
|
||||
B3_GPU_SAP_KERNEL_BRUTE_FORCE_CPU=1,
|
||||
B3_GPU_SAP_KERNEL_BRUTE_FORCE_GPU,
|
||||
B3_GPU_SAP_KERNEL_ORIGINAL,
|
||||
B3_GPU_SAP_KERNEL_BARRIER,
|
||||
B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY,
|
||||
B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY_BATCH_WRITE
|
||||
};
|
||||
|
||||
b3GpuSapBroadphase(cl_context ctx,cl_device_id device, cl_command_queue q , b3GpuSapKernelType kernelType=B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY);
|
||||
virtual ~b3GpuSapBroadphase();
|
||||
|
||||
static b3GpuBroadphaseInterface* CreateFunc(cl_context ctx,cl_device_id device, cl_command_queue q)
|
||||
static b3GpuBroadphaseInterface* CreateFuncBruteForceCpu(cl_context ctx,cl_device_id device, cl_command_queue q)
|
||||
{
|
||||
return new b3GpuSapBroadphase(ctx,device,q);
|
||||
return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_BRUTE_FORCE_CPU);
|
||||
}
|
||||
|
||||
static b3GpuBroadphaseInterface* CreateFuncBruteForceGpu(cl_context ctx,cl_device_id device, cl_command_queue q)
|
||||
{
|
||||
return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_BRUTE_FORCE_GPU);
|
||||
}
|
||||
|
||||
static b3GpuBroadphaseInterface* CreateFuncOriginal(cl_context ctx,cl_device_id device, cl_command_queue q)
|
||||
{
|
||||
return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_ORIGINAL);
|
||||
}
|
||||
static b3GpuBroadphaseInterface* CreateFuncBarrier(cl_context ctx,cl_device_id device, cl_command_queue q)
|
||||
{
|
||||
return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_BARRIER);
|
||||
}
|
||||
static b3GpuBroadphaseInterface* CreateFuncLocalMemory(cl_context ctx,cl_device_id device, cl_command_queue q)
|
||||
{
|
||||
return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY);
|
||||
}
|
||||
static b3GpuBroadphaseInterface* CreateFuncLocalMemoryBatchWrite(cl_context ctx,cl_device_id device, cl_command_queue q)
|
||||
{
|
||||
return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY_BATCH_WRITE);
|
||||
}
|
||||
|
||||
virtual void calculateOverlappingPairs(int maxPairs);
|
||||
|
||||
@@ -100,6 +100,32 @@ __kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAa
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
__kernel void computePairsKernelBruteForce( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)
|
||||
{
|
||||
int i = get_global_id(0);
|
||||
if (i>=numObjects)
|
||||
return;
|
||||
for (int j=i+1;j<numObjects;j++)
|
||||
{
|
||||
if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))
|
||||
{
|
||||
int4 myPair;
|
||||
myPair.x = aabbs[i].m_minIndices[3];
|
||||
myPair.y = aabbs[j].m_minIndices[3];
|
||||
myPair.z = NEW_PAIR_MARKER;
|
||||
myPair.w = NEW_PAIR_MARKER;
|
||||
|
||||
int curPair = atomic_inc (pairCount);
|
||||
if (curPair<maxPairs)
|
||||
{
|
||||
pairsOut[curPair] = myPair; //flush to main memory
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void computePairsKernelOriginal( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)
|
||||
{
|
||||
int i = get_global_id(0);
|
||||
|
||||
@@ -328,8 +328,8 @@ __kernel void computePairsIncremental3dSapKernel( __global const uint2* object
|
||||
|
||||
}
|
||||
|
||||
//computePairsKernelBatchWrite
|
||||
__kernel void computePairsKernel( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)
|
||||
|
||||
__kernel void computePairsKernelLocalSharedMemoryBatchWrite( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)
|
||||
{
|
||||
int i = get_global_id(0);
|
||||
int localId = get_local_id(0);
|
||||
|
||||
@@ -296,8 +296,7 @@ static const char* sapFastCL= \
|
||||
" }//for (int otherbuffer\n"
|
||||
" }//for (int axis=0;\n"
|
||||
"}\n"
|
||||
"//computePairsKernelBatchWrite\n"
|
||||
"__kernel void computePairsKernel( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
|
||||
"__kernel void computePairsKernelLocalSharedMemoryBatchWrite( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" int localId = get_local_id(0);\n"
|
||||
|
||||
@@ -88,6 +88,28 @@ static const char* sapCL= \
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"__kernel void computePairsKernelBruteForce( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" if (i>=numObjects)\n"
|
||||
" return;\n"
|
||||
" for (int j=i+1;j<numObjects;j++)\n"
|
||||
" {\n"
|
||||
" if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))\n"
|
||||
" {\n"
|
||||
" int4 myPair;\n"
|
||||
" myPair.x = aabbs[i].m_minIndices[3];\n"
|
||||
" myPair.y = aabbs[j].m_minIndices[3];\n"
|
||||
" myPair.z = NEW_PAIR_MARKER;\n"
|
||||
" myPair.w = NEW_PAIR_MARKER;\n"
|
||||
" int curPair = atomic_inc (pairCount);\n"
|
||||
" if (curPair<maxPairs)\n"
|
||||
" {\n"
|
||||
" pairsOut[curPair] = myPair; //flush to main memory\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"__kernel void computePairsKernelOriginal( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
|
||||
Reference in New Issue
Block a user