bt -> b3 and BT -> B3 rename for content and filenames
This commit is contained in:
@@ -13,8 +13,8 @@ subject to the following restrictions:
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
#ifndef BT_OPENCL_INCLUDE_H
|
||||
#define BT_OPENCL_INCLUDE_H
|
||||
#ifndef B3_OPENCL_INCLUDE_H
|
||||
#define B3_OPENCL_INCLUDE_H
|
||||
|
||||
|
||||
#ifdef __APPLE__
|
||||
@@ -40,5 +40,5 @@ subject to the following restrictions:
|
||||
#define oclCHECKERROR(a, b) if((a)!=(b)) { printf("OCL Error : %d\n", (a)); assert((a) == (b)); }
|
||||
|
||||
|
||||
#endif //BT_OPENCL_INCLUDE_H
|
||||
#endif //B3_OPENCL_INCLUDE_H
|
||||
|
||||
|
||||
@@ -31,14 +31,14 @@ bool gDebugSkipLoadingBinary = false;
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define BT_MAX_CL_DEVICES 16 //who needs 16 devices?
|
||||
#define B3_MAX_CL_DEVICES 16 //who needs 16 devices?
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <Windows.h>
|
||||
#endif
|
||||
|
||||
#include <assert.h>
|
||||
#define btAssert assert
|
||||
#define b3Assert assert
|
||||
|
||||
|
||||
//Set the preferred platform vendor using the OpenCL SDK
|
||||
@@ -78,14 +78,14 @@ void MyFatalBreakAPPLE( const char * errstr ,
|
||||
} else
|
||||
{
|
||||
printf("error\n");
|
||||
btAssert(0);
|
||||
b3Assert(0);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
int btOpenCLUtils_getNumPlatforms(cl_int* pErrNum)
|
||||
int b3OpenCLUtils_getNumPlatforms(cl_int* pErrNum)
|
||||
{
|
||||
|
||||
cl_platform_id pPlatforms[10] = { 0 };
|
||||
@@ -103,12 +103,12 @@ int btOpenCLUtils_getNumPlatforms(cl_int* pErrNum)
|
||||
|
||||
}
|
||||
|
||||
const char* btOpenCLUtils_getSdkVendorName()
|
||||
const char* b3OpenCLUtils_getSdkVendorName()
|
||||
{
|
||||
return spPlatformVendor;
|
||||
}
|
||||
|
||||
cl_platform_id btOpenCLUtils_getPlatform(int platformIndex0, cl_int* pErrNum)
|
||||
cl_platform_id b3OpenCLUtils_getPlatform(int platformIndex0, cl_int* pErrNum)
|
||||
{
|
||||
cl_platform_id platform = 0;
|
||||
unsigned int platformIndex = (unsigned int )platformIndex0;
|
||||
@@ -134,20 +134,20 @@ cl_platform_id btOpenCLUtils_getPlatform(int platformIndex0, cl_int* pErrNum)
|
||||
return platform;
|
||||
}
|
||||
|
||||
void b3OpenCLUtils::getPlatformInfo(cl_platform_id platform, btOpenCLPlatformInfo* platformInfo)
|
||||
void b3OpenCLUtils::getPlatformInfo(cl_platform_id platform, b3OpenCLPlatformInfo* platformInfo)
|
||||
{
|
||||
cl_int ciErrNum;
|
||||
ciErrNum = clGetPlatformInfo( platform,CL_PLATFORM_VENDOR,BT_MAX_STRING_LENGTH,platformInfo->m_platformVendor,NULL);
|
||||
ciErrNum = clGetPlatformInfo( platform,CL_PLATFORM_VENDOR,B3_MAX_STRING_LENGTH,platformInfo->m_platformVendor,NULL);
|
||||
oclCHECKERROR(ciErrNum,CL_SUCCESS);
|
||||
ciErrNum = clGetPlatformInfo( platform,CL_PLATFORM_NAME,BT_MAX_STRING_LENGTH,platformInfo->m_platformName,NULL);
|
||||
ciErrNum = clGetPlatformInfo( platform,CL_PLATFORM_NAME,B3_MAX_STRING_LENGTH,platformInfo->m_platformName,NULL);
|
||||
oclCHECKERROR(ciErrNum,CL_SUCCESS);
|
||||
ciErrNum = clGetPlatformInfo( platform,CL_PLATFORM_VERSION,BT_MAX_STRING_LENGTH,platformInfo->m_platformVersion,NULL);
|
||||
ciErrNum = clGetPlatformInfo( platform,CL_PLATFORM_VERSION,B3_MAX_STRING_LENGTH,platformInfo->m_platformVersion,NULL);
|
||||
oclCHECKERROR(ciErrNum,CL_SUCCESS);
|
||||
}
|
||||
|
||||
void btOpenCLUtils_printPlatformInfo(cl_platform_id platform)
|
||||
void b3OpenCLUtils_printPlatformInfo(cl_platform_id platform)
|
||||
{
|
||||
btOpenCLPlatformInfo platformInfo;
|
||||
b3OpenCLPlatformInfo platformInfo;
|
||||
b3OpenCLUtils::getPlatformInfo (platform, &platformInfo);
|
||||
printf("Platform info:\n");
|
||||
printf(" CL_PLATFORM_VENDOR: \t\t\t%s\n",platformInfo.m_platformVendor);
|
||||
@@ -157,12 +157,12 @@ void btOpenCLUtils_printPlatformInfo(cl_platform_id platform)
|
||||
|
||||
|
||||
|
||||
cl_context btOpenCLUtils_createContextFromPlatform(cl_platform_id platform, cl_device_type deviceType, cl_int* pErrNum, void* pGLContext, void* pGLDC, int preferredDeviceIndex, int preferredPlatformIndex)
|
||||
cl_context b3OpenCLUtils_createContextFromPlatform(cl_platform_id platform, cl_device_type deviceType, cl_int* pErrNum, void* pGLContext, void* pGLDC, int preferredDeviceIndex, int preferredPlatformIndex)
|
||||
{
|
||||
cl_context retContext = 0;
|
||||
cl_int ciErrNum=0;
|
||||
cl_uint num_entries;
|
||||
cl_device_id devices[BT_MAX_CL_DEVICES];
|
||||
cl_device_id devices[B3_MAX_CL_DEVICES];
|
||||
cl_uint num_devices;
|
||||
cl_context_properties* cprops;
|
||||
|
||||
@@ -182,7 +182,7 @@ cl_context btOpenCLUtils_createContextFromPlatform(cl_platform_id platform, cl_d
|
||||
cps[5] = (cl_context_properties)pGLDC;
|
||||
}
|
||||
#endif //_WIN32
|
||||
num_entries = BT_MAX_CL_DEVICES;
|
||||
num_entries = B3_MAX_CL_DEVICES;
|
||||
|
||||
|
||||
num_devices=-1;
|
||||
@@ -241,7 +241,7 @@ cl_context btOpenCLUtils_createContextFromPlatform(cl_platform_id platform, cl_d
|
||||
return retContext;
|
||||
}
|
||||
|
||||
cl_context btOpenCLUtils_createContextFromType(cl_device_type deviceType, cl_int* pErrNum, void* pGLContext, void* pGLDC , int preferredDeviceIndex, int preferredPlatformIndex, cl_platform_id* retPlatformId)
|
||||
cl_context b3OpenCLUtils_createContextFromType(cl_device_type deviceType, cl_int* pErrNum, void* pGLContext, void* pGLDC , int preferredDeviceIndex, int preferredPlatformIndex, cl_platform_id* retPlatformId)
|
||||
{
|
||||
cl_uint numPlatforms;
|
||||
cl_context retContext = 0;
|
||||
@@ -303,12 +303,12 @@ cl_context btOpenCLUtils_createContextFromType(cl_device_type deviceType, cl_int
|
||||
cl_platform_id platform = platforms[i];
|
||||
assert(platform);
|
||||
|
||||
retContext = btOpenCLUtils_createContextFromPlatform(platform,deviceType,pErrNum,pGLContext,pGLDC,preferredDeviceIndex,preferredPlatformIndex);
|
||||
retContext = b3OpenCLUtils_createContextFromPlatform(platform,deviceType,pErrNum,pGLContext,pGLDC,preferredDeviceIndex,preferredPlatformIndex);
|
||||
|
||||
if (retContext)
|
||||
{
|
||||
// printf("OpenCL platform details:\n");
|
||||
btOpenCLPlatformInfo platformInfo;
|
||||
b3OpenCLPlatformInfo platformInfo;
|
||||
|
||||
b3OpenCLUtils::getPlatformInfo(platform, &platformInfo);
|
||||
|
||||
@@ -332,7 +332,7 @@ cl_context btOpenCLUtils_createContextFromType(cl_device_type deviceType, cl_int
|
||||
//! @param cxMainContext OpenCL context
|
||||
//! @param device_idx index of the device of interest
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
cl_device_id btOpenCLUtils_getDevice(cl_context cxMainContext, int deviceIndex)
|
||||
cl_device_id b3OpenCLUtils_getDevice(cl_context cxMainContext, int deviceIndex)
|
||||
{
|
||||
assert(cxMainContext);
|
||||
|
||||
@@ -357,7 +357,7 @@ cl_device_id btOpenCLUtils_getDevice(cl_context cxMainContext, int deviceIndex)
|
||||
return device;
|
||||
}
|
||||
|
||||
int btOpenCLUtils_getNumDevices(cl_context cxMainContext)
|
||||
int b3OpenCLUtils_getNumDevices(cl_context cxMainContext)
|
||||
{
|
||||
size_t szParamDataBytes;
|
||||
int device_count;
|
||||
@@ -368,16 +368,16 @@ int btOpenCLUtils_getNumDevices(cl_context cxMainContext)
|
||||
|
||||
|
||||
|
||||
void b3OpenCLUtils::getDeviceInfo(cl_device_id device, btOpenCLDeviceInfo* info)
|
||||
void b3OpenCLUtils::getDeviceInfo(cl_device_id device, b3OpenCLDeviceInfo* info)
|
||||
{
|
||||
// CL_DEVICE_NAME
|
||||
clGetDeviceInfo(device, CL_DEVICE_NAME, BT_MAX_STRING_LENGTH, &info->m_deviceName, NULL);
|
||||
clGetDeviceInfo(device, CL_DEVICE_NAME, B3_MAX_STRING_LENGTH, &info->m_deviceName, NULL);
|
||||
|
||||
// CL_DEVICE_VENDOR
|
||||
clGetDeviceInfo(device, CL_DEVICE_VENDOR, BT_MAX_STRING_LENGTH, &info->m_deviceVendor, NULL);
|
||||
clGetDeviceInfo(device, CL_DEVICE_VENDOR, B3_MAX_STRING_LENGTH, &info->m_deviceVendor, NULL);
|
||||
|
||||
// CL_DRIVER_VERSION
|
||||
clGetDeviceInfo(device, CL_DRIVER_VERSION, BT_MAX_STRING_LENGTH, &info->m_driverVersion, NULL);
|
||||
clGetDeviceInfo(device, CL_DRIVER_VERSION, B3_MAX_STRING_LENGTH, &info->m_driverVersion, NULL);
|
||||
|
||||
// CL_DEVICE_INFO
|
||||
clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(cl_device_type), &info->m_deviceType, NULL);
|
||||
@@ -438,7 +438,7 @@ void b3OpenCLUtils::getDeviceInfo(cl_device_id device, btOpenCLDeviceInfo* info)
|
||||
clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(size_t), &info->m_image3dMaxDepth, NULL);
|
||||
|
||||
// CL_DEVICE_EXTENSIONS: get device extensions, and if any then parse & log the string onto separate lines
|
||||
clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, BT_MAX_STRING_LENGTH, &info->m_deviceExtensions, NULL);
|
||||
clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, B3_MAX_STRING_LENGTH, &info->m_deviceExtensions, NULL);
|
||||
|
||||
// CL_DEVICE_PREFERRED_VECTOR_WIDTH_<type>
|
||||
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, sizeof(cl_uint), &info->m_vecWidthChar, NULL);
|
||||
@@ -450,9 +450,9 @@ void b3OpenCLUtils::getDeviceInfo(cl_device_id device, btOpenCLDeviceInfo* info)
|
||||
}
|
||||
|
||||
|
||||
void btOpenCLUtils_printDeviceInfo(cl_device_id device)
|
||||
void b3OpenCLUtils_printDeviceInfo(cl_device_id device)
|
||||
{
|
||||
btOpenCLDeviceInfo info;
|
||||
b3OpenCLDeviceInfo info;
|
||||
b3OpenCLUtils::getDeviceInfo(device,&info);
|
||||
printf("Device Info:\n");
|
||||
printf(" CL_DEVICE_NAME: \t\t\t%s\n", info.m_deviceName);
|
||||
@@ -521,7 +521,7 @@ static const char* strip2(const char* name, const char* pattern)
|
||||
return oriptr;
|
||||
}
|
||||
|
||||
cl_program btOpenCLUtils_compileCLProgramFromString(cl_context clContext, cl_device_id device, const char* kernelSourceOrg, cl_int* pErrNum, const char* additionalMacrosArg , const char* clFileNameForCaching, bool disableBinaryCaching)
|
||||
cl_program b3OpenCLUtils_compileCLProgramFromString(cl_context clContext, cl_device_id device, const char* kernelSourceOrg, cl_int* pErrNum, const char* additionalMacrosArg , const char* clFileNameForCaching, bool disableBinaryCaching)
|
||||
{
|
||||
const char* additionalMacros = additionalMacrosArg?additionalMacrosArg:"";
|
||||
|
||||
@@ -534,7 +534,7 @@ cl_program btOpenCLUtils_compileCLProgramFromString(cl_context clContext, cl_dev
|
||||
cl_int status;
|
||||
|
||||
#ifdef _WIN32
|
||||
char binaryFileName[BT_MAX_STRING_LENGTH];
|
||||
char binaryFileName[B3_MAX_STRING_LENGTH];
|
||||
char* bla=0;
|
||||
|
||||
if (clFileNameForCaching && !(disableBinaryCaching || gDebugSkipLoadingBinary||gDebugForceLoadingFromSource) )
|
||||
@@ -556,7 +556,7 @@ cl_program btOpenCLUtils_compileCLProgramFromString(cl_context clContext, cl_dev
|
||||
|
||||
|
||||
#ifdef _WIN32
|
||||
sprintf_s(binaryFileName,BT_MAX_STRING_LENGTH,"cache/%s.%s.%s.bin",strippedName, deviceName,driverVersion );
|
||||
sprintf_s(binaryFileName,B3_MAX_STRING_LENGTH,"cache/%s.%s.%s.bin",strippedName, deviceName,driverVersion );
|
||||
#else
|
||||
sprintf(binaryFileName,"cache/%s.%s.%s.bin",strippedName, deviceName,driverVersion );
|
||||
#endif
|
||||
@@ -701,9 +701,9 @@ cl_program btOpenCLUtils_compileCLProgramFromString(cl_context clContext, cl_dev
|
||||
fclose( file );
|
||||
|
||||
m_cpProgram = clCreateProgramWithBinary( clContext, 1,&device, &binarySize, (const unsigned char**)&binary, 0, &status );
|
||||
btAssert( status == CL_SUCCESS );
|
||||
b3Assert( status == CL_SUCCESS );
|
||||
status = clBuildProgram( m_cpProgram, 1, &device, additionalMacros, 0, 0 );
|
||||
btAssert( status == CL_SUCCESS );
|
||||
b3Assert( status == CL_SUCCESS );
|
||||
|
||||
if( status != CL_SUCCESS )
|
||||
{
|
||||
@@ -715,7 +715,7 @@ cl_program btOpenCLUtils_compileCLProgramFromString(cl_context clContext, cl_dev
|
||||
build_log[ret_val_size] = '\0';
|
||||
printf("%s\n", build_log);
|
||||
free (build_log);
|
||||
btAssert(0);
|
||||
b3Assert(0);
|
||||
m_cpProgram = 0;
|
||||
}
|
||||
free (binary);
|
||||
@@ -825,7 +825,7 @@ cl_program btOpenCLUtils_compileCLProgramFromString(cl_context clContext, cl_dev
|
||||
|
||||
cl_uint numAssociatedDevices;
|
||||
status = clGetProgramInfo( m_cpProgram, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &numAssociatedDevices, 0 );
|
||||
btAssert( status == CL_SUCCESS );
|
||||
b3Assert( status == CL_SUCCESS );
|
||||
if (numAssociatedDevices==1)
|
||||
{
|
||||
|
||||
@@ -833,12 +833,12 @@ cl_program btOpenCLUtils_compileCLProgramFromString(cl_context clContext, cl_dev
|
||||
char* binary ;
|
||||
|
||||
status = clGetProgramInfo( m_cpProgram, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binarySize, 0 );
|
||||
btAssert( status == CL_SUCCESS );
|
||||
b3Assert( status == CL_SUCCESS );
|
||||
|
||||
binary = (char*)malloc(sizeof(char)*binarySize);
|
||||
|
||||
status = clGetProgramInfo( m_cpProgram, CL_PROGRAM_BINARIES, sizeof(char*), &binary, 0 );
|
||||
btAssert( status == CL_SUCCESS );
|
||||
b3Assert( status == CL_SUCCESS );
|
||||
|
||||
{
|
||||
FILE* file=0;
|
||||
@@ -870,7 +870,7 @@ cl_program btOpenCLUtils_compileCLProgramFromString(cl_context clContext, cl_dev
|
||||
}
|
||||
|
||||
|
||||
cl_kernel btOpenCLUtils_compileCLKernelFromString(cl_context clContext, cl_device_id device, const char* kernelSource, const char* kernelName, cl_int* pErrNum, cl_program prog, const char* additionalMacros )
|
||||
cl_kernel b3OpenCLUtils_compileCLKernelFromString(cl_context clContext, cl_device_id device, const char* kernelSource, const char* kernelName, cl_int* pErrNum, cl_program prog, const char* additionalMacros )
|
||||
{
|
||||
|
||||
cl_kernel kernel;
|
||||
@@ -882,7 +882,7 @@ cl_kernel btOpenCLUtils_compileCLKernelFromString(cl_context clContext, cl_devic
|
||||
|
||||
if (!m_cpProgram)
|
||||
{
|
||||
m_cpProgram = btOpenCLUtils_compileCLProgramFromString(clContext,device,kernelSource,pErrNum, additionalMacros,0, false);
|
||||
m_cpProgram = b3OpenCLUtils_compileCLProgramFromString(clContext,device,kernelSource,pErrNum, additionalMacros,0, false);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -16,8 +16,8 @@ subject to the following restrictions:
|
||||
//original author: Roman Ponomarev
|
||||
//cleanup by Erwin Coumans
|
||||
|
||||
#ifndef BT_OPENCL_UTILS_H
|
||||
#define BT_OPENCL_UTILS_H
|
||||
#ifndef B3_OPENCL_UTILS_H
|
||||
#define B3_OPENCL_UTILS_H
|
||||
|
||||
#include "b3OpenCLInclude.h"
|
||||
|
||||
@@ -30,42 +30,42 @@ extern "C" {
|
||||
|
||||
/// CL Context optionally takes a GL context. This is a generic type because we don't really want this code
|
||||
/// to have to understand GL types. It is a HGLRC in _WIN32 or a GLXContext otherwise.
|
||||
cl_context btOpenCLUtils_createContextFromType(cl_device_type deviceType, cl_int* pErrNum, void* pGLCtx , void* pGLDC , int preferredDeviceIndex , int preferredPlatformIndex, cl_platform_id* platformId);
|
||||
cl_context b3OpenCLUtils_createContextFromType(cl_device_type deviceType, cl_int* pErrNum, void* pGLCtx , void* pGLDC , int preferredDeviceIndex , int preferredPlatformIndex, cl_platform_id* platformId);
|
||||
|
||||
int btOpenCLUtils_getNumDevices(cl_context cxMainContext);
|
||||
int b3OpenCLUtils_getNumDevices(cl_context cxMainContext);
|
||||
|
||||
cl_device_id btOpenCLUtils_getDevice(cl_context cxMainContext, int nr);
|
||||
cl_device_id b3OpenCLUtils_getDevice(cl_context cxMainContext, int nr);
|
||||
|
||||
void btOpenCLUtils_printDeviceInfo(cl_device_id device);
|
||||
void b3OpenCLUtils_printDeviceInfo(cl_device_id device);
|
||||
|
||||
cl_kernel btOpenCLUtils_compileCLKernelFromString( cl_context clContext,cl_device_id device, const char* kernelSource, const char* kernelName, cl_int* pErrNum, cl_program prog,const char* additionalMacros);
|
||||
cl_kernel b3OpenCLUtils_compileCLKernelFromString( cl_context clContext,cl_device_id device, const char* kernelSource, const char* kernelName, cl_int* pErrNum, cl_program prog,const char* additionalMacros);
|
||||
|
||||
//optional
|
||||
cl_program btOpenCLUtils_compileCLProgramFromString( cl_context clContext,cl_device_id device, const char* kernelSource, cl_int* pErrNum,const char* additionalMacros , const char* srcFileNameForCaching, bool disableBinaryCaching);
|
||||
cl_program b3OpenCLUtils_compileCLProgramFromString( cl_context clContext,cl_device_id device, const char* kernelSource, cl_int* pErrNum,const char* additionalMacros , const char* srcFileNameForCaching, bool disableBinaryCaching);
|
||||
|
||||
//the following optional APIs provide access using specific platform information
|
||||
int btOpenCLUtils_getNumPlatforms(cl_int* pErrNum);
|
||||
int b3OpenCLUtils_getNumPlatforms(cl_int* pErrNum);
|
||||
|
||||
///get the nr'th platform, where nr is in the range [0..getNumPlatforms)
|
||||
cl_platform_id btOpenCLUtils_getPlatform(int nr, cl_int* pErrNum);
|
||||
cl_platform_id b3OpenCLUtils_getPlatform(int nr, cl_int* pErrNum);
|
||||
|
||||
void btOpenCLUtils_printPlatformInfo(cl_platform_id platform);
|
||||
void b3OpenCLUtils_printPlatformInfo(cl_platform_id platform);
|
||||
|
||||
const char* btOpenCLUtils_getSdkVendorName();
|
||||
const char* b3OpenCLUtils_getSdkVendorName();
|
||||
|
||||
cl_context btOpenCLUtils_createContextFromPlatform(cl_platform_id platform, cl_device_type deviceType, cl_int* pErrNum, void* pGLCtx , void* pGLDC ,int preferredDeviceIndex , int preferredPlatformIndex);
|
||||
cl_context b3OpenCLUtils_createContextFromPlatform(cl_platform_id platform, cl_device_type deviceType, cl_int* pErrNum, void* pGLCtx , void* pGLDC ,int preferredDeviceIndex , int preferredPlatformIndex);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
#define BT_MAX_STRING_LENGTH 1024
|
||||
#define B3_MAX_STRING_LENGTH 1024
|
||||
|
||||
typedef struct
|
||||
{
|
||||
char m_deviceName[BT_MAX_STRING_LENGTH];
|
||||
char m_deviceVendor[BT_MAX_STRING_LENGTH];
|
||||
char m_driverVersion[BT_MAX_STRING_LENGTH];
|
||||
char m_deviceExtensions[BT_MAX_STRING_LENGTH];
|
||||
char m_deviceName[B3_MAX_STRING_LENGTH];
|
||||
char m_deviceVendor[B3_MAX_STRING_LENGTH];
|
||||
char m_driverVersion[B3_MAX_STRING_LENGTH];
|
||||
char m_deviceExtensions[B3_MAX_STRING_LENGTH];
|
||||
|
||||
cl_device_type m_deviceType;
|
||||
cl_uint m_computeUnits;
|
||||
@@ -99,14 +99,14 @@ typedef struct
|
||||
cl_uint m_vecWidthFloat;
|
||||
cl_uint m_vecWidthDouble;
|
||||
|
||||
} btOpenCLDeviceInfo;
|
||||
} b3OpenCLDeviceInfo;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
char m_platformVendor[BT_MAX_STRING_LENGTH];
|
||||
char m_platformName[BT_MAX_STRING_LENGTH];
|
||||
char m_platformVersion[BT_MAX_STRING_LENGTH];
|
||||
} btOpenCLPlatformInfo;
|
||||
char m_platformVendor[B3_MAX_STRING_LENGTH];
|
||||
char m_platformName[B3_MAX_STRING_LENGTH];
|
||||
char m_platformVersion[B3_MAX_STRING_LENGTH];
|
||||
} b3OpenCLPlatformInfo;
|
||||
|
||||
|
||||
///C++ API for OpenCL utilities: convenience functions
|
||||
@@ -116,64 +116,64 @@ struct b3OpenCLUtils
|
||||
/// to have to understand GL types. It is a HGLRC in _WIN32 or a GLXContext otherwise.
|
||||
static inline cl_context createContextFromType(cl_device_type deviceType, cl_int* pErrNum, void* pGLCtx = 0, void* pGLDC = 0, int preferredDeviceIndex = -1, int preferredPlatformIndex= - 1, cl_platform_id* platformId=0)
|
||||
{
|
||||
return btOpenCLUtils_createContextFromType(deviceType, pErrNum, pGLCtx , pGLDC , preferredDeviceIndex, preferredPlatformIndex, platformId);
|
||||
return b3OpenCLUtils_createContextFromType(deviceType, pErrNum, pGLCtx , pGLDC , preferredDeviceIndex, preferredPlatformIndex, platformId);
|
||||
}
|
||||
|
||||
static inline int getNumDevices(cl_context cxMainContext)
|
||||
{
|
||||
return btOpenCLUtils_getNumDevices(cxMainContext);
|
||||
return b3OpenCLUtils_getNumDevices(cxMainContext);
|
||||
}
|
||||
static inline cl_device_id getDevice(cl_context cxMainContext, int nr)
|
||||
{
|
||||
return btOpenCLUtils_getDevice(cxMainContext,nr);
|
||||
return b3OpenCLUtils_getDevice(cxMainContext,nr);
|
||||
}
|
||||
|
||||
static void getDeviceInfo(cl_device_id device, btOpenCLDeviceInfo* info);
|
||||
static void getDeviceInfo(cl_device_id device, b3OpenCLDeviceInfo* info);
|
||||
|
||||
static inline void printDeviceInfo(cl_device_id device)
|
||||
{
|
||||
btOpenCLUtils_printDeviceInfo(device);
|
||||
b3OpenCLUtils_printDeviceInfo(device);
|
||||
}
|
||||
|
||||
static inline cl_kernel compileCLKernelFromString( cl_context clContext,cl_device_id device, const char* kernelSource, const char* kernelName, cl_int* pErrNum=0, cl_program prog=0,const char* additionalMacros = "" )
|
||||
{
|
||||
return btOpenCLUtils_compileCLKernelFromString(clContext,device, kernelSource, kernelName, pErrNum, prog,additionalMacros);
|
||||
return b3OpenCLUtils_compileCLKernelFromString(clContext,device, kernelSource, kernelName, pErrNum, prog,additionalMacros);
|
||||
}
|
||||
|
||||
//optional
|
||||
static inline cl_program compileCLProgramFromString( cl_context clContext,cl_device_id device, const char* kernelSource, cl_int* pErrNum=0,const char* additionalMacros = "" , const char* srcFileNameForCaching=0, bool disableBinaryCaching=false)
|
||||
{
|
||||
return btOpenCLUtils_compileCLProgramFromString(clContext,device, kernelSource, pErrNum,additionalMacros, srcFileNameForCaching, disableBinaryCaching);
|
||||
return b3OpenCLUtils_compileCLProgramFromString(clContext,device, kernelSource, pErrNum,additionalMacros, srcFileNameForCaching, disableBinaryCaching);
|
||||
}
|
||||
|
||||
//the following optional APIs provide access using specific platform information
|
||||
static inline int getNumPlatforms(cl_int* pErrNum=0)
|
||||
{
|
||||
return btOpenCLUtils_getNumPlatforms(pErrNum);
|
||||
return b3OpenCLUtils_getNumPlatforms(pErrNum);
|
||||
}
|
||||
///get the nr'th platform, where nr is in the range [0..getNumPlatforms)
|
||||
static inline cl_platform_id getPlatform(int nr, cl_int* pErrNum=0)
|
||||
{
|
||||
return btOpenCLUtils_getPlatform(nr,pErrNum);
|
||||
return b3OpenCLUtils_getPlatform(nr,pErrNum);
|
||||
}
|
||||
|
||||
static void getPlatformInfo(cl_platform_id platform, btOpenCLPlatformInfo* platformInfo);
|
||||
static void getPlatformInfo(cl_platform_id platform, b3OpenCLPlatformInfo* platformInfo);
|
||||
|
||||
static inline void printPlatformInfo(cl_platform_id platform)
|
||||
{
|
||||
btOpenCLUtils_printPlatformInfo(platform);
|
||||
b3OpenCLUtils_printPlatformInfo(platform);
|
||||
}
|
||||
|
||||
static inline const char* getSdkVendorName()
|
||||
{
|
||||
return btOpenCLUtils_getSdkVendorName();
|
||||
return b3OpenCLUtils_getSdkVendorName();
|
||||
}
|
||||
static inline cl_context createContextFromPlatform(cl_platform_id platform, cl_device_type deviceType, cl_int* pErrNum, void* pGLCtx = 0, void* pGLDC = 0,int preferredDeviceIndex = -1, int preferredPlatformIndex= -1)
|
||||
{
|
||||
return btOpenCLUtils_createContextFromPlatform(platform, deviceType, pErrNum, pGLCtx,pGLDC,preferredDeviceIndex, preferredPlatformIndex);
|
||||
return b3OpenCLUtils_createContextFromPlatform(platform, deviceType, pErrNum, pGLCtx,pGLDC,preferredDeviceIndex, preferredPlatformIndex);
|
||||
}
|
||||
};
|
||||
|
||||
#endif //__cplusplus
|
||||
|
||||
#endif // BT_OPENCL_UTILS_H
|
||||
#endif // B3_OPENCL_UTILS_H
|
||||
|
||||
@@ -37,7 +37,7 @@ int main(int argc, char* argv[])
|
||||
for (int i=0;i<numPlatforms;i++)
|
||||
{
|
||||
cl_platform_id platform = b3OpenCLUtils::getPlatform(i);
|
||||
btOpenCLPlatformInfo platformInfo;
|
||||
b3OpenCLPlatformInfo platformInfo;
|
||||
b3OpenCLUtils::getPlatformInfo(platform,&platformInfo);
|
||||
printf("--------------------------------\n");
|
||||
printf("Platform info for platform nr %d:\n",i);
|
||||
@@ -52,7 +52,7 @@ int main(int argc, char* argv[])
|
||||
for (int j=0;j<numDevices;j++)
|
||||
{
|
||||
cl_device_id dev = b3OpenCLUtils::getDevice(context,j);
|
||||
btOpenCLDeviceInfo devInfo;
|
||||
b3OpenCLDeviceInfo devInfo;
|
||||
b3OpenCLUtils::getDeviceInfo(dev,&devInfo);
|
||||
b3OpenCLUtils::printDeviceInfo(dev);
|
||||
}
|
||||
@@ -77,7 +77,7 @@ int main(int argc, char* argv[])
|
||||
{
|
||||
cl_device_id device;
|
||||
device = b3OpenCLUtils::getDevice(g_cxMainContext,i);
|
||||
btOpenCLDeviceInfo clInfo;
|
||||
b3OpenCLDeviceInfo clInfo;
|
||||
b3OpenCLUtils::getDeviceInfo(device,&clInfo);
|
||||
b3OpenCLUtils::printDeviceInfo(device);
|
||||
// create a command-queue
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
|
||||
#include "b3GpuSapBroadphase.h"
|
||||
#include "Bullet3Common/b3Vector3.h"
|
||||
#include "parallel_primitives/host/btLauncherCL.h"
|
||||
#include "parallel_primitives/host/b3LauncherCL.h"
|
||||
#include "Bullet3Common/b3Quickprof.h"
|
||||
#include "basic_initialize/b3OpenCLUtils.h"
|
||||
|
||||
@@ -29,9 +29,9 @@ m_currentBuffer(-1)
|
||||
cl_int errNum=0;
|
||||
|
||||
cl_program sapProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,sapSrc,&errNum,"","opencl/gpu_broadphase/kernels/sap.cl");
|
||||
btAssert(errNum==CL_SUCCESS);
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
cl_program sapFastProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,sapFastSrc,&errNum,"","opencl/gpu_broadphase/kernels/sapFast.cl");
|
||||
btAssert(errNum==CL_SUCCESS);
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
|
||||
|
||||
//m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelOriginal",&errNum,sapProg );
|
||||
@@ -40,19 +40,19 @@ m_currentBuffer(-1)
|
||||
|
||||
|
||||
m_sap2Kernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelTwoArrays",&errNum,sapProg );
|
||||
btAssert(errNum==CL_SUCCESS);
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
|
||||
#if 0
|
||||
|
||||
m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelOriginal",&errNum,sapProg );
|
||||
btAssert(errNum==CL_SUCCESS);
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
#else
|
||||
#ifndef __APPLE__
|
||||
m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapFastSrc, "computePairsKernel",&errNum,sapFastProg );
|
||||
btAssert(errNum==CL_SUCCESS);
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
#else
|
||||
m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelLocalSharedMemory",&errNum,sapProg );
|
||||
btAssert(errNum==CL_SUCCESS);
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@@ -62,7 +62,7 @@ m_currentBuffer(-1)
|
||||
|
||||
m_scatterKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "scatterKernel",&errNum,sapProg );
|
||||
|
||||
m_sorter = new btRadixSort32CL(m_context,m_device,m_queue);
|
||||
m_sorter = new b3RadixSort32CL(m_context,m_device,m_queue);
|
||||
}
|
||||
|
||||
b3GpuSapBroadphase::~b3GpuSapBroadphase()
|
||||
@@ -125,7 +125,7 @@ void b3GpuSapBroadphase::init3dSap()
|
||||
}
|
||||
void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
|
||||
{
|
||||
btAssert(m_currentBuffer>=0);
|
||||
b3Assert(m_currentBuffer>=0);
|
||||
if (m_currentBuffer<0)
|
||||
return;
|
||||
|
||||
@@ -135,7 +135,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
|
||||
{
|
||||
for (int buf=0;buf<2;buf++)
|
||||
{
|
||||
btAssert(m_sortedAxisCPU[axis][buf].size() == m_allAabbsCPU.size());
|
||||
b3Assert(m_sortedAxisCPU[axis][buf].size() == m_allAabbsCPU.size());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -163,7 +163,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHost()
|
||||
|
||||
int axis=0;
|
||||
|
||||
btAssert(m_allAabbsCPU.size() == m_allAabbsGPU.size());
|
||||
b3Assert(m_allAabbsCPU.size() == m_allAabbsGPU.size());
|
||||
|
||||
|
||||
|
||||
@@ -192,7 +192,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHost()
|
||||
}
|
||||
}
|
||||
|
||||
b3AlignedObjectArray<btInt2> hostPairs;
|
||||
b3AlignedObjectArray<b3Int2> hostPairs;
|
||||
|
||||
{
|
||||
int numSmallAabbs = m_smallAabbsCPU.size();
|
||||
@@ -205,7 +205,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHost()
|
||||
if (TestAabbAgainstAabb2((b3Vector3&)m_smallAabbsCPU[i].m_min, (b3Vector3&)m_smallAabbsCPU[i].m_max,
|
||||
(b3Vector3&)m_smallAabbsCPU[j].m_min,(b3Vector3&)m_smallAabbsCPU[j].m_max))
|
||||
{
|
||||
btInt2 pair;
|
||||
b3Int2 pair;
|
||||
pair.x = m_smallAabbsCPU[i].m_minIndices[3];//store the original index in the unsorted aabb array
|
||||
pair.y = m_smallAabbsCPU[j].m_minIndices[3];
|
||||
hostPairs.push_back(pair);
|
||||
@@ -227,7 +227,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHost()
|
||||
if (TestAabbAgainstAabb2((b3Vector3&)m_smallAabbsCPU[i].m_min, (b3Vector3&)m_smallAabbsCPU[i].m_max,
|
||||
(b3Vector3&)m_largeAabbsCPU[j].m_min,(b3Vector3&)m_largeAabbsCPU[j].m_max))
|
||||
{
|
||||
btInt2 pair;
|
||||
b3Int2 pair;
|
||||
pair.x = m_largeAabbsCPU[j].m_minIndices[3];
|
||||
pair.y = m_smallAabbsCPU[i].m_minIndices[3];//store the original index in the unsorted aabb array
|
||||
hostPairs.push_back(pair);
|
||||
@@ -261,7 +261,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairs()
|
||||
|
||||
if (syncOnHost)
|
||||
{
|
||||
BT_PROFILE("Synchronize m_smallAabbsGPU (CPU/slow)");
|
||||
B3_PROFILE("Synchronize m_smallAabbsGPU (CPU/slow)");
|
||||
|
||||
m_allAabbsGPU.copyToHost(m_allAabbsCPU);
|
||||
|
||||
@@ -284,14 +284,14 @@ void b3GpuSapBroadphase::calculateOverlappingPairs()
|
||||
int numSmallAabbs = m_smallAabbsGPU.size();
|
||||
if (numSmallAabbs)
|
||||
{
|
||||
BT_PROFILE("copyAabbsKernelSmall");
|
||||
btBufferInfoCL bInfo[] = {
|
||||
btBufferInfoCL( m_allAabbsGPU.getBufferCL(), true ),
|
||||
btBufferInfoCL( m_smallAabbsGPU.getBufferCL()),
|
||||
B3_PROFILE("copyAabbsKernelSmall");
|
||||
b3BufferInfoCL bInfo[] = {
|
||||
b3BufferInfoCL( m_allAabbsGPU.getBufferCL(), true ),
|
||||
b3BufferInfoCL( m_smallAabbsGPU.getBufferCL()),
|
||||
};
|
||||
|
||||
btLauncherCL launcher(m_queue, m_copyAabbsKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3LauncherCL launcher(m_queue, m_copyAabbsKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( numSmallAabbs );
|
||||
int num = numSmallAabbs;
|
||||
launcher.launch1D( num);
|
||||
@@ -302,7 +302,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairs()
|
||||
|
||||
if (syncOnHost)
|
||||
{
|
||||
BT_PROFILE("Synchronize m_largeAabbsGPU (CPU/slow)");
|
||||
B3_PROFILE("Synchronize m_largeAabbsGPU (CPU/slow)");
|
||||
|
||||
m_allAabbsGPU.copyToHost(m_allAabbsCPU);
|
||||
|
||||
@@ -325,14 +325,14 @@ void b3GpuSapBroadphase::calculateOverlappingPairs()
|
||||
|
||||
if (numLargeAabbs)
|
||||
{
|
||||
BT_PROFILE("copyAabbsKernelLarge");
|
||||
btBufferInfoCL bInfo[] = {
|
||||
btBufferInfoCL( m_allAabbsGPU.getBufferCL(), true ),
|
||||
btBufferInfoCL( m_largeAabbsGPU.getBufferCL()),
|
||||
B3_PROFILE("copyAabbsKernelLarge");
|
||||
b3BufferInfoCL bInfo[] = {
|
||||
b3BufferInfoCL( m_allAabbsGPU.getBufferCL(), true ),
|
||||
b3BufferInfoCL( m_largeAabbsGPU.getBufferCL()),
|
||||
};
|
||||
|
||||
btLauncherCL launcher(m_queue, m_copyAabbsKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3LauncherCL launcher(m_queue, m_copyAabbsKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( numLargeAabbs );
|
||||
int num = numLargeAabbs;
|
||||
launcher.launch1D( num);
|
||||
@@ -343,7 +343,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairs()
|
||||
|
||||
|
||||
|
||||
BT_PROFILE("GPU SAP");
|
||||
B3_PROFILE("GPU SAP");
|
||||
|
||||
int numSmallAabbs = m_smallAabbsGPU.size();
|
||||
m_gpuSmallSortData.resize(numSmallAabbs);
|
||||
@@ -352,10 +352,10 @@ void b3GpuSapBroadphase::calculateOverlappingPairs()
|
||||
#if 1
|
||||
if (m_smallAabbsGPU.size())
|
||||
{
|
||||
BT_PROFILE("flipFloatKernel");
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( m_smallAabbsGPU.getBufferCL(), true ), btBufferInfoCL( m_gpuSmallSortData.getBufferCL())};
|
||||
btLauncherCL launcher(m_queue, m_flipFloatKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
B3_PROFILE("flipFloatKernel");
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_smallAabbsGPU.getBufferCL(), true ), b3BufferInfoCL( m_gpuSmallSortData.getBufferCL())};
|
||||
b3LauncherCL launcher(m_queue, m_flipFloatKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( numSmallAabbs );
|
||||
launcher.setConst( axis );
|
||||
|
||||
@@ -365,7 +365,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairs()
|
||||
}
|
||||
|
||||
{
|
||||
BT_PROFILE("gpu radix sort\n");
|
||||
B3_PROFILE("gpu radix sort\n");
|
||||
m_sorter->execute(m_gpuSmallSortData);
|
||||
clFinish(m_queue);
|
||||
}
|
||||
@@ -373,10 +373,10 @@ void b3GpuSapBroadphase::calculateOverlappingPairs()
|
||||
m_gpuSmallSortedAabbs.resize(numSmallAabbs);
|
||||
if (numSmallAabbs)
|
||||
{
|
||||
BT_PROFILE("scatterKernel");
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( m_smallAabbsGPU.getBufferCL(), true ), btBufferInfoCL( m_gpuSmallSortData.getBufferCL(),true),btBufferInfoCL(m_gpuSmallSortedAabbs.getBufferCL())};
|
||||
btLauncherCL launcher(m_queue, m_scatterKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
B3_PROFILE("scatterKernel");
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_smallAabbsGPU.getBufferCL(), true ), b3BufferInfoCL( m_gpuSmallSortData.getBufferCL(),true),b3BufferInfoCL(m_gpuSmallSortedAabbs.getBufferCL())};
|
||||
b3LauncherCL launcher(m_queue, m_scatterKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( numSmallAabbs);
|
||||
int num = numSmallAabbs;
|
||||
launcher.launch1D( num);
|
||||
@@ -389,7 +389,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairs()
|
||||
int maxPairs = maxPairsPerBody * numSmallAabbs;//todo
|
||||
m_overlappingPairs.resize(maxPairs);
|
||||
|
||||
btOpenCLArray<int> pairCount(m_context, m_queue);
|
||||
b3OpenCLArray<int> pairCount(m_context, m_queue);
|
||||
pairCount.push_back(0);
|
||||
int numPairs=0;
|
||||
|
||||
@@ -397,10 +397,10 @@ void b3GpuSapBroadphase::calculateOverlappingPairs()
|
||||
int numLargeAabbs = m_largeAabbsGPU.size();
|
||||
if (numLargeAabbs && numSmallAabbs)
|
||||
{
|
||||
BT_PROFILE("sap2Kernel");
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( m_largeAabbsGPU.getBufferCL() ),btBufferInfoCL( m_gpuSmallSortedAabbs.getBufferCL() ), btBufferInfoCL( m_overlappingPairs.getBufferCL() ), btBufferInfoCL(pairCount.getBufferCL())};
|
||||
btLauncherCL launcher(m_queue, m_sap2Kernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
B3_PROFILE("sap2Kernel");
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_largeAabbsGPU.getBufferCL() ),b3BufferInfoCL( m_gpuSmallSortedAabbs.getBufferCL() ), b3BufferInfoCL( m_overlappingPairs.getBufferCL() ), b3BufferInfoCL(pairCount.getBufferCL())};
|
||||
b3LauncherCL launcher(m_queue, m_sap2Kernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( numLargeAabbs );
|
||||
launcher.setConst( numSmallAabbs);
|
||||
launcher.setConst( axis );
|
||||
@@ -416,10 +416,10 @@ void b3GpuSapBroadphase::calculateOverlappingPairs()
|
||||
}
|
||||
if (m_gpuSmallSortedAabbs.size())
|
||||
{
|
||||
BT_PROFILE("sapKernel");
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( m_gpuSmallSortedAabbs.getBufferCL() ), btBufferInfoCL( m_overlappingPairs.getBufferCL() ), btBufferInfoCL(pairCount.getBufferCL())};
|
||||
btLauncherCL launcher(m_queue, m_sapKernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
B3_PROFILE("sapKernel");
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_gpuSmallSortedAabbs.getBufferCL() ), b3BufferInfoCL( m_overlappingPairs.getBufferCL() ), b3BufferInfoCL(pairCount.getBufferCL())};
|
||||
b3LauncherCL launcher(m_queue, m_sapKernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( numSmallAabbs );
|
||||
launcher.setConst( axis );
|
||||
launcher.setConst( maxPairs );
|
||||
@@ -461,7 +461,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairs()
|
||||
int numPairs = 0;
|
||||
|
||||
|
||||
btLauncherCL launcher(m_queue, m_sapKernel);
|
||||
b3LauncherCL launcher(m_queue, m_sapKernel);
|
||||
|
||||
const char* fileName = "m_sapKernelArgs.bin";
|
||||
FILE* f = fopen(fileName,"rb");
|
||||
@@ -480,13 +480,13 @@ void b3GpuSapBroadphase::calculateOverlappingPairs()
|
||||
int num = *(int*)&buf[serializedBytes];
|
||||
launcher.launch1D( num);
|
||||
|
||||
btOpenCLArray<int> pairCount(m_context, m_queue);
|
||||
b3OpenCLArray<int> pairCount(m_context, m_queue);
|
||||
int numElements = launcher.m_arrays[2]->size()/sizeof(int);
|
||||
pairCount.setFromOpenCLBuffer(launcher.m_arrays[2]->getBufferCL(),numElements);
|
||||
numPairs = pairCount.at(0);
|
||||
//printf("overlapping pairs = %d\n",numPairs);
|
||||
b3AlignedObjectArray<btInt2> hostOoverlappingPairs;
|
||||
btOpenCLArray<btInt2> tmpGpuPairs(m_context,m_queue);
|
||||
b3AlignedObjectArray<b3Int2> hostOoverlappingPairs;
|
||||
b3OpenCLArray<b3Int2> tmpGpuPairs(m_context,m_queue);
|
||||
tmpGpuPairs.setFromOpenCLBuffer(launcher.m_arrays[1]->getBufferCL(),numPairs );
|
||||
|
||||
tmpGpuPairs.copyToHost(hostOoverlappingPairs);
|
||||
@@ -507,7 +507,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairs()
|
||||
|
||||
m_overlappingPairs.resize(numPairs);
|
||||
|
||||
}//BT_PROFILE("GPU_RADIX SORT");
|
||||
}//B3_PROFILE("GPU_RADIX SORT");
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
#ifndef BT_GPU_SAP_BROADPHASE_H
|
||||
#define BT_GPU_SAP_BROADPHASE_H
|
||||
#ifndef B3_GPU_SAP_BROADPHASE_H
|
||||
#define B3_GPU_SAP_BROADPHASE_H
|
||||
|
||||
#include "parallel_primitives/host/btOpenCLArray.h"
|
||||
#include "parallel_primitives/host/btFillCL.h" //btInt2
|
||||
#include "parallel_primitives/host/b3OpenCLArray.h"
|
||||
#include "parallel_primitives/host/b3FillCL.h" //b3Int2
|
||||
class b3Vector3;
|
||||
#include "parallel_primitives/host/btRadixSort32CL.h"
|
||||
#include "parallel_primitives/host/b3RadixSort32CL.h"
|
||||
|
||||
#include "b3SapAabb.h"
|
||||
|
||||
@@ -22,28 +22,28 @@ class b3GpuSapBroadphase
|
||||
cl_kernel m_sapKernel;
|
||||
cl_kernel m_sap2Kernel;
|
||||
|
||||
class btRadixSort32CL* m_sorter;
|
||||
class b3RadixSort32CL* m_sorter;
|
||||
|
||||
///test for 3d SAP
|
||||
b3AlignedObjectArray<btSortData> m_sortedAxisCPU[3][2];
|
||||
b3AlignedObjectArray<b3SortData> m_sortedAxisCPU[3][2];
|
||||
int m_currentBuffer;
|
||||
|
||||
public:
|
||||
|
||||
btOpenCLArray<b3SapAabb> m_allAabbsGPU;
|
||||
b3OpenCLArray<b3SapAabb> m_allAabbsGPU;
|
||||
b3AlignedObjectArray<b3SapAabb> m_allAabbsCPU;
|
||||
|
||||
btOpenCLArray<b3SapAabb> m_smallAabbsGPU;
|
||||
b3OpenCLArray<b3SapAabb> m_smallAabbsGPU;
|
||||
b3AlignedObjectArray<b3SapAabb> m_smallAabbsCPU;
|
||||
|
||||
btOpenCLArray<b3SapAabb> m_largeAabbsGPU;
|
||||
b3OpenCLArray<b3SapAabb> m_largeAabbsGPU;
|
||||
b3AlignedObjectArray<b3SapAabb> m_largeAabbsCPU;
|
||||
|
||||
btOpenCLArray<btInt2> m_overlappingPairs;
|
||||
b3OpenCLArray<b3Int2> m_overlappingPairs;
|
||||
|
||||
//temporary gpu work memory
|
||||
btOpenCLArray<btSortData> m_gpuSmallSortData;
|
||||
btOpenCLArray<b3SapAabb> m_gpuSmallSortedAabbs;
|
||||
b3OpenCLArray<b3SortData> m_gpuSmallSortData;
|
||||
b3OpenCLArray<b3SapAabb> m_gpuSmallSortedAabbs;
|
||||
|
||||
|
||||
b3GpuSapBroadphase(cl_context ctx,cl_device_id device, cl_command_queue q );
|
||||
@@ -66,4 +66,4 @@ class b3GpuSapBroadphase
|
||||
cl_mem getOverlappingPairBuffer();
|
||||
};
|
||||
|
||||
#endif //BT_GPU_SAP_BROADPHASE_H
|
||||
#endif //B3_GPU_SAP_BROADPHASE_H
|
||||
@@ -1,5 +1,5 @@
|
||||
#ifndef BT_SAP_AABB_H
|
||||
#define BT_SAP_AABB_H
|
||||
#ifndef B3_SAP_AABB_H
|
||||
#define B3_SAP_AABB_H
|
||||
|
||||
struct b3SapAabb
|
||||
{
|
||||
@@ -15,4 +15,4 @@ struct b3SapAabb
|
||||
};
|
||||
};
|
||||
|
||||
#endif //BT_SAP_AABB_H
|
||||
#endif //B3_SAP_AABB_H
|
||||
|
||||
@@ -30,12 +30,12 @@ static const char* sapFastCL= \
|
||||
" float m_maxElems[4];\n"
|
||||
" int m_maxIndices[4];\n"
|
||||
" };\n"
|
||||
"} btAabbCL;\n"
|
||||
"} b3AabbCL;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"/// conservative test for overlap between two aabbs\n"
|
||||
"bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2);\n"
|
||||
"bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2)\n"
|
||||
"bool TestAabbAgainstAabb2(const b3AabbCL* aabb1, __local const b3AabbCL* aabb2);\n"
|
||||
"bool TestAabbAgainstAabb2(const b3AabbCL* aabb1, __local const b3AabbCL* aabb2)\n"
|
||||
"{\n"
|
||||
"//skip pairs between static (mass=0) objects\n"
|
||||
" if ((aabb1->m_maxIndices[3]==0) && (aabb2->m_maxIndices[3] == 0))\n"
|
||||
@@ -50,18 +50,18 @@ static const char* sapFastCL= \
|
||||
"\n"
|
||||
"\n"
|
||||
"//computePairsKernelBatchWrite\n"
|
||||
"__kernel void computePairsKernel( __global const btAabbCL* aabbs, volatile __global int2* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
|
||||
"__kernel void computePairsKernel( __global const b3AabbCL* aabbs, volatile __global int2* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" int localId = get_local_id(0);\n"
|
||||
"\n"
|
||||
" __local int numActiveWgItems[1];\n"
|
||||
" __local int breakRequest[1];\n"
|
||||
" __local btAabbCL localAabbs[128];// = aabbs[i];\n"
|
||||
" __local b3AabbCL localAabbs[128];// = aabbs[i];\n"
|
||||
" \n"
|
||||
" int2 myPairs[64];\n"
|
||||
" \n"
|
||||
" btAabbCL myAabb;\n"
|
||||
" b3AabbCL myAabb;\n"
|
||||
" \n"
|
||||
" myAabb = (i<numObjects)? aabbs[i]:aabbs[0];\n"
|
||||
" float testValue = myAabb.m_maxElems[axis];\n"
|
||||
|
||||
@@ -30,12 +30,12 @@ static const char* sapCL= \
|
||||
" float m_maxElems[4];\n"
|
||||
" int m_maxIndices[4];\n"
|
||||
" };\n"
|
||||
"} btAabbCL;\n"
|
||||
"} b3AabbCL;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"/// conservative test for overlap between two aabbs\n"
|
||||
"bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2);\n"
|
||||
"bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2)\n"
|
||||
"bool TestAabbAgainstAabb2(const b3AabbCL* aabb1, __local const b3AabbCL* aabb2);\n"
|
||||
"bool TestAabbAgainstAabb2(const b3AabbCL* aabb1, __local const b3AabbCL* aabb2)\n"
|
||||
"{\n"
|
||||
" bool overlap = true;\n"
|
||||
" overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n"
|
||||
@@ -43,8 +43,8 @@ static const char* sapCL= \
|
||||
" overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n"
|
||||
" return overlap;\n"
|
||||
"}\n"
|
||||
"bool TestAabbAgainstAabb2GlobalGlobal(__global const btAabbCL* aabb1, __global const btAabbCL* aabb2);\n"
|
||||
"bool TestAabbAgainstAabb2GlobalGlobal(__global const btAabbCL* aabb1, __global const btAabbCL* aabb2)\n"
|
||||
"bool TestAabbAgainstAabb2GlobalGlobal(__global const b3AabbCL* aabb1, __global const b3AabbCL* aabb2);\n"
|
||||
"bool TestAabbAgainstAabb2GlobalGlobal(__global const b3AabbCL* aabb1, __global const b3AabbCL* aabb2)\n"
|
||||
"{\n"
|
||||
" bool overlap = true;\n"
|
||||
" overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n"
|
||||
@@ -53,8 +53,8 @@ static const char* sapCL= \
|
||||
" return overlap;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2);\n"
|
||||
"bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2)\n"
|
||||
"bool TestAabbAgainstAabb2Global(const b3AabbCL* aabb1, __global const b3AabbCL* aabb2);\n"
|
||||
"bool TestAabbAgainstAabb2Global(const b3AabbCL* aabb1, __global const b3AabbCL* aabb2)\n"
|
||||
"{\n"
|
||||
" bool overlap = true;\n"
|
||||
" overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n"
|
||||
@@ -64,7 +64,7 @@ static const char* sapCL= \
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const btAabbCL* sortedAabbs, volatile __global int2* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numSortedAabbs, int axis, int maxPairs)\n"
|
||||
"__kernel void computePairsKernelTwoArrays( __global const b3AabbCL* unsortedAabbs, __global const b3AabbCL* sortedAabbs, volatile __global int2* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numSortedAabbs, int axis, int maxPairs)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" if (i>=numUnsortedAabbs)\n"
|
||||
@@ -89,7 +89,7 @@ static const char* sapCL= \
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__kernel void computePairsKernelOriginal( __global const btAabbCL* aabbs, volatile __global int2* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
|
||||
"__kernel void computePairsKernelOriginal( __global const b3AabbCL* aabbs, volatile __global int2* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" if (i>=numObjects)\n"
|
||||
@@ -117,7 +117,7 @@ static const char* sapCL= \
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void computePairsKernelBarrier( __global const btAabbCL* aabbs, volatile __global int2* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
|
||||
"__kernel void computePairsKernelBarrier( __global const b3AabbCL* aabbs, volatile __global int2* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" int localId = get_local_id(0);\n"
|
||||
@@ -181,16 +181,16 @@ static const char* sapCL= \
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void computePairsKernelLocalSharedMemory( __global const btAabbCL* aabbs, volatile __global int2* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
|
||||
"__kernel void computePairsKernelLocalSharedMemory( __global const b3AabbCL* aabbs, volatile __global int2* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" int localId = get_local_id(0);\n"
|
||||
"\n"
|
||||
" __local int numActiveWgItems[1];\n"
|
||||
" __local int breakRequest[1];\n"
|
||||
" __local btAabbCL localAabbs[128];// = aabbs[i];\n"
|
||||
" __local b3AabbCL localAabbs[128];// = aabbs[i];\n"
|
||||
" \n"
|
||||
" btAabbCL myAabb;\n"
|
||||
" b3AabbCL myAabb;\n"
|
||||
" \n"
|
||||
" myAabb = (i<numObjects)? aabbs[i]:aabbs[0];\n"
|
||||
" float testValue = myAabb.m_maxElems[axis];\n"
|
||||
@@ -289,7 +289,7 @@ static const char* sapCL= \
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void copyAabbsKernel( __global const btAabbCL* allAabbs, __global btAabbCL* destAabbs, int numObjects)\n"
|
||||
"__kernel void copyAabbsKernel( __global const b3AabbCL* allAabbs, __global b3AabbCL* destAabbs, int numObjects)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" if (i>=numObjects)\n"
|
||||
@@ -300,7 +300,7 @@ static const char* sapCL= \
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void flipFloatKernel( __global const btAabbCL* aabbs, volatile __global int2* sortData, int numObjects, int axis)\n"
|
||||
"__kernel void flipFloatKernel( __global const b3AabbCL* aabbs, volatile __global int2* sortData, int numObjects, int axis)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" if (i>=numObjects)\n"
|
||||
@@ -312,7 +312,7 @@ static const char* sapCL= \
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void scatterKernel( __global const btAabbCL* aabbs, volatile __global const int2* sortData, __global btAabbCL* sortedAabbs, int numObjects)\n"
|
||||
"__kernel void scatterKernel( __global const b3AabbCL* aabbs, volatile __global const int2* sortData, __global b3AabbCL* sortedAabbs, int numObjects)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" if (i>=numObjects)\n"
|
||||
|
||||
@@ -17,10 +17,10 @@ subject to the following restrictions:
|
||||
#include "../basic_initialize/b3OpenCLUtils.h"
|
||||
#include "../host/b3GpuSapBroadphase.h"
|
||||
#include "Bullet3Common/b3Vector3.h"
|
||||
#include "parallel_primitives/host/btFillCL.h"
|
||||
#include "parallel_primitives/host/btBoundSearchCL.h"
|
||||
#include "parallel_primitives/host/btRadixSort32CL.h"
|
||||
#include "parallel_primitives/host/btPrefixScanCL.h"
|
||||
#include "parallel_primitives/host/b3FillCL.h"
|
||||
#include "parallel_primitives/host/b3BoundSearchCL.h"
|
||||
#include "parallel_primitives/host/b3RadixSort32CL.h"
|
||||
#include "parallel_primitives/host/b3PrefixScanCL.h"
|
||||
#include "Bullet3Common/b3CommandLineArgs.h"
|
||||
#include "Bullet3Common/b3MinMax.h"
|
||||
|
||||
@@ -52,7 +52,7 @@ void initCL(int preferredDeviceIndex, int preferredPlatformIndex)
|
||||
int numDev = b3OpenCLUtils::getNumDevices(g_context);
|
||||
if (numDev>0)
|
||||
{
|
||||
btOpenCLDeviceInfo info;
|
||||
b3OpenCLDeviceInfo info;
|
||||
g_device= b3OpenCLUtils::getDevice(g_context,0);
|
||||
g_queue = clCreateCommandQueue(g_context, g_device, 0, &ciErrNum);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
|
||||
#ifndef BT_COLLIDABLE_H
|
||||
#define BT_COLLIDABLE_H
|
||||
#ifndef B3_COLLIDABLE_H
|
||||
#define B3_COLLIDABLE_H
|
||||
|
||||
enum btShapeTypes
|
||||
enum b3ShapeTypes
|
||||
{
|
||||
SHAPE_HEIGHT_FIELD=1,
|
||||
|
||||
@@ -25,14 +25,14 @@ struct b3Collidable
|
||||
int m_shapeIndex;
|
||||
};
|
||||
|
||||
struct btCollidableNew
|
||||
struct b3CollidableNew
|
||||
{
|
||||
short int m_shapeType;
|
||||
short int m_numShapes;
|
||||
int m_shapeIndex;
|
||||
};
|
||||
|
||||
struct btGpuChildShape
|
||||
struct b3GpuChildShape
|
||||
{
|
||||
float m_childPosition[4];
|
||||
float m_childOrientation[4];
|
||||
@@ -42,7 +42,7 @@ struct btGpuChildShape
|
||||
int m_unused2;
|
||||
};
|
||||
|
||||
struct btCompoundOverlappingPair
|
||||
struct b3CompoundOverlappingPair
|
||||
{
|
||||
int m_bodyIndexA;
|
||||
int m_bodyIndexB;
|
||||
@@ -50,4 +50,4 @@ struct btCompoundOverlappingPair
|
||||
int m_childShapeIndexA;
|
||||
int m_childShapeIndexB;
|
||||
};
|
||||
#endif //BT_COLLIDABLE_H
|
||||
#endif //B3_COLLIDABLE_H
|
||||
|
||||
@@ -18,19 +18,19 @@ subject to the following restrictions:
|
||||
///Separating axis rest based on work from Pierre Terdiman, see
|
||||
///And contact clipping based on work from Simon Hobbs
|
||||
|
||||
//#define BT_DEBUG_SAT_FACE
|
||||
//#define B3_DEBUG_SAT_FACE
|
||||
|
||||
#include "b3ConvexHullContact.h"
|
||||
#include <string.h>//memcpy
|
||||
#include "b3ConvexPolyhedronCL.h"
|
||||
|
||||
|
||||
typedef b3AlignedObjectArray<b3Vector3> btVertexArray;
|
||||
typedef b3AlignedObjectArray<b3Vector3> b3VertexArray;
|
||||
#include "Bullet3Common/b3Quickprof.h"
|
||||
|
||||
#include <float.h> //for FLT_MAX
|
||||
#include "basic_initialize/b3OpenCLUtils.h"
|
||||
#include "parallel_primitives/host/btLauncherCL.h"
|
||||
#include "parallel_primitives/host/b3LauncherCL.h"
|
||||
//#include "AdlQuaternion.h"
|
||||
|
||||
#include "../kernels/satKernels.h"
|
||||
@@ -42,7 +42,7 @@ typedef b3AlignedObjectArray<b3Vector3> btVertexArray;
|
||||
#include "Bullet3Geometry/b3AabbUtil.h"
|
||||
|
||||
|
||||
#define dot3F4 btDot
|
||||
#define dot3F4 b3Dot
|
||||
|
||||
GpuSatCollision::GpuSatCollision(cl_context ctx,cl_device_id device, cl_command_queue q )
|
||||
:m_context(ctx),
|
||||
@@ -65,22 +65,22 @@ m_totalContactsOut(m_context, m_queue)
|
||||
//#endif
|
||||
|
||||
cl_program satProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,src,&errNum,flags,"opencl/gpu_narrowphase/kernels/sat.cl");
|
||||
btAssert(errNum==CL_SUCCESS);
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
|
||||
m_findSeparatingAxisKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,src, "findSeparatingAxisKernel",&errNum,satProg );
|
||||
btAssert(m_findSeparatingAxisKernel);
|
||||
btAssert(errNum==CL_SUCCESS);
|
||||
b3Assert(m_findSeparatingAxisKernel);
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
|
||||
m_findConcaveSeparatingAxisKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,src, "findConcaveSeparatingAxisKernel",&errNum,satProg );
|
||||
btAssert(m_findConcaveSeparatingAxisKernel);
|
||||
btAssert(errNum==CL_SUCCESS);
|
||||
b3Assert(m_findConcaveSeparatingAxisKernel);
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
|
||||
m_findCompoundPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,src, "findCompoundPairsKernel",&errNum,satProg );
|
||||
btAssert(m_findCompoundPairsKernel);
|
||||
btAssert(errNum==CL_SUCCESS);
|
||||
b3Assert(m_findCompoundPairsKernel);
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
m_processCompoundPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,src, "processCompoundPairsKernel",&errNum,satProg );
|
||||
btAssert(m_processCompoundPairsKernel);
|
||||
btAssert(errNum==CL_SUCCESS);
|
||||
b3Assert(m_processCompoundPairsKernel);
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
}
|
||||
|
||||
if (1)
|
||||
@@ -93,30 +93,30 @@ m_totalContactsOut(m_context, m_queue)
|
||||
//#endif
|
||||
|
||||
cl_program satClipContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,srcClip,&errNum,flags,"opencl/gpu_narrowphase/kernels/satClipHullContacts.cl");
|
||||
btAssert(errNum==CL_SUCCESS);
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
|
||||
m_clipHullHullKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipHullHullKernel",&errNum,satClipContactsProg);
|
||||
btAssert(errNum==CL_SUCCESS);
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
|
||||
m_clipCompoundsHullHullKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipCompoundsHullHullKernel",&errNum,satClipContactsProg);
|
||||
btAssert(errNum==CL_SUCCESS);
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
|
||||
|
||||
m_findClippingFacesKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "findClippingFacesKernel",&errNum,satClipContactsProg);
|
||||
btAssert(errNum==CL_SUCCESS);
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
|
||||
m_clipFacesAndContactReductionKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipFacesAndContactReductionKernel",&errNum,satClipContactsProg);
|
||||
btAssert(errNum==CL_SUCCESS);
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
|
||||
m_clipHullHullConcaveConvexKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipHullHullConcaveConvexKernel",&errNum,satClipContactsProg);
|
||||
btAssert(errNum==CL_SUCCESS);
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
|
||||
m_extractManifoldAndAddContactKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "extractManifoldAndAddContactKernel",&errNum,satClipContactsProg);
|
||||
btAssert(errNum==CL_SUCCESS);
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
|
||||
m_newContactReductionKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip,
|
||||
"newContactReductionKernel",&errNum,satClipContactsProg);
|
||||
btAssert(errNum==CL_SUCCESS);
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -133,28 +133,28 @@ m_totalContactsOut(m_context, m_queue)
|
||||
{
|
||||
const char* srcBvh = bvhTraversalKernelCL;
|
||||
cl_program bvhTraversalProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,srcBvh,&errNum,"","opencl/gpu_narrowphase/kernels/bvhTraversal.cl");
|
||||
btAssert(errNum==CL_SUCCESS);
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
|
||||
m_bvhTraversalKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcBvh, "bvhTraversalKernel",&errNum,bvhTraversalProg,"");
|
||||
btAssert(errNum==CL_SUCCESS);
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
|
||||
}
|
||||
|
||||
{
|
||||
const char* primitiveContactsSrc = primitiveContactsKernelsCL;
|
||||
cl_program primitiveContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,primitiveContactsSrc,&errNum,"","opencl/gpu_narrowphase/kernels/primitiveContacts.cl");
|
||||
btAssert(errNum==CL_SUCCESS);
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
|
||||
m_primitiveContactsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,primitiveContactsSrc, "primitiveContactsKernel",&errNum,primitiveContactsProg,"");
|
||||
btAssert(errNum==CL_SUCCESS);
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
|
||||
m_findConcaveSphereContactsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,primitiveContactsSrc, "findConcaveSphereContactsKernel",&errNum,primitiveContactsProg );
|
||||
btAssert(errNum==CL_SUCCESS);
|
||||
btAssert(m_findConcaveSphereContactsKernel);
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
b3Assert(m_findConcaveSphereContactsKernel);
|
||||
|
||||
m_processCompoundPairsPrimitivesKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,primitiveContactsSrc, "processCompoundPairsPrimitivesKernel",&errNum,primitiveContactsProg,"");
|
||||
btAssert(errNum==CL_SUCCESS);
|
||||
btAssert(m_processCompoundPairsPrimitivesKernel);
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
b3Assert(m_processCompoundPairsPrimitivesKernel);
|
||||
|
||||
}
|
||||
|
||||
@@ -207,7 +207,7 @@ GpuSatCollision::~GpuSatCollision()
|
||||
|
||||
}
|
||||
|
||||
struct MyTriangleCallback : public btNodeOverlapCallback
|
||||
struct MyTriangleCallback : public b3NodeOverlapCallback
|
||||
{
|
||||
int m_bodyIndexA;
|
||||
int m_bodyIndexB;
|
||||
@@ -221,7 +221,7 @@ struct MyTriangleCallback : public btNodeOverlapCallback
|
||||
|
||||
|
||||
#define float4 b3Vector3
|
||||
#define make_float4(x,y,z,w) btVector4(x,y,z,w)
|
||||
#define make_float4(x,y,z,w) b3Vector4(x,y,z,w)
|
||||
|
||||
float signedDistanceFromPointToPlane(const float4& point, const float4& planeEqn, float4* closestPointOnFace)
|
||||
{
|
||||
@@ -248,7 +248,7 @@ b3Vector3 transform(b3Vector3* v, const b3Vector3* pos, const b3Vector3* orn)
|
||||
|
||||
|
||||
inline bool IsPointInPolygon(const float4& p,
|
||||
const btGpuFace* face,
|
||||
const b3GpuFace* face,
|
||||
const float4* baseVertex,
|
||||
const int* convexIndices,
|
||||
float4* out)
|
||||
@@ -277,10 +277,10 @@ inline bool IsPointInPolygon(const float4& p,
|
||||
ap = p-a;
|
||||
v = cross3(ab,plane);
|
||||
|
||||
if (btDot(ap, v) > 0.f)
|
||||
if (b3Dot(ap, v) > 0.f)
|
||||
{
|
||||
float ab_m2 = btDot(ab, ab);
|
||||
float rt = ab_m2 != 0.f ? btDot(ab, ap) / ab_m2 : 0.f;
|
||||
float ab_m2 = b3Dot(ab, ab);
|
||||
float rt = ab_m2 != 0.f ? b3Dot(ab, ap) / ab_m2 : 0.f;
|
||||
if (rt <= 0.f)
|
||||
{
|
||||
*out = a;
|
||||
@@ -305,7 +305,7 @@ inline bool IsPointInPolygon(const float4& p,
|
||||
#define normalize3(a) (a.normalize())
|
||||
|
||||
|
||||
int extractManifoldSequentialGlobal( const float4* p, int nPoints, const float4& nearNormal, btInt4* contactIdx)
|
||||
int extractManifoldSequentialGlobal( const float4* p, int nPoints, const float4& nearNormal, b3Int4* contactIdx)
|
||||
{
|
||||
if( nPoints == 0 )
|
||||
return 0;
|
||||
@@ -414,7 +414,7 @@ void computeContactPlaneConvex(int pairIndex,
|
||||
const b3ConvexPolyhedronCL* convexShapes,
|
||||
const b3Vector3* convexVertices,
|
||||
const int* convexIndices,
|
||||
const btGpuFace* faces,
|
||||
const b3GpuFace* faces,
|
||||
b3Contact4* globalContactsOut,
|
||||
int& nGlobalContactsOut,
|
||||
int maxContactCapacity)
|
||||
@@ -459,7 +459,7 @@ void computeContactPlaneConvex(int pairIndex,
|
||||
b3Vector3 contactPoints[MAX_PLANE_CONVEX_POINTS];
|
||||
int numPoints = 0;
|
||||
|
||||
btInt4 contactIdx;
|
||||
b3Int4 contactIdx;
|
||||
contactIdx.s[0] = 0;
|
||||
contactIdx.s[1] = 1;
|
||||
contactIdx.s[2] = 2;
|
||||
@@ -547,14 +547,14 @@ void computeContactPlaneCompound(int pairIndex,
|
||||
const b3ConvexPolyhedronCL* convexShapes,
|
||||
const b3Vector3* convexVertices,
|
||||
const int* convexIndices,
|
||||
const btGpuFace* faces,
|
||||
const b3GpuFace* faces,
|
||||
b3Contact4* globalContactsOut,
|
||||
int& nGlobalContactsOut,
|
||||
int maxContactCapacity)
|
||||
{
|
||||
|
||||
int shapeTypeB = collidables[collidableIndexB].m_shapeType;
|
||||
btAssert(shapeTypeB == SHAPE_COMPOUND_OF_CONVEX_HULLS);
|
||||
b3Assert(shapeTypeB == SHAPE_COMPOUND_OF_CONVEX_HULLS);
|
||||
|
||||
|
||||
|
||||
@@ -597,7 +597,7 @@ void computeContactPlaneCompound(int pairIndex,
|
||||
b3Vector3 contactPoints[MAX_PLANE_CONVEX_POINTS];
|
||||
int numPoints = 0;
|
||||
|
||||
btInt4 contactIdx;
|
||||
b3Int4 contactIdx;
|
||||
contactIdx.s[0] = 0;
|
||||
contactIdx.s[1] = 1;
|
||||
contactIdx.s[2] = 2;
|
||||
@@ -686,7 +686,7 @@ void computeContactSphereConvex(int pairIndex,
|
||||
const b3ConvexPolyhedronCL* convexShapes,
|
||||
const b3Vector3* convexVertices,
|
||||
const int* convexIndices,
|
||||
const btGpuFace* faces,
|
||||
const b3GpuFace* faces,
|
||||
b3Contact4* globalContactsOut,
|
||||
int& nGlobalContactsOut,
|
||||
int maxContactCapacity)
|
||||
@@ -722,7 +722,7 @@ void computeContactSphereConvex(int pairIndex,
|
||||
float4 localHitNormal;
|
||||
for ( int f = 0; f < numFaces; f++ )
|
||||
{
|
||||
btGpuFace face = faces[convexShapes[shapeIndex].m_faceOffset+f];
|
||||
b3GpuFace face = faces[convexShapes[shapeIndex].m_faceOffset+f];
|
||||
float4 planeEqn;
|
||||
float4 localPlaneNormal = make_float4(face.m_plane.getX(),face.m_plane.getY(),face.m_plane.getZ(),0.f);
|
||||
float4 n1 = localPlaneNormal;//quatRotate(quat,localPlaneNormal);
|
||||
@@ -763,7 +763,7 @@ void computeContactSphereConvex(int pairIndex,
|
||||
b3Scalar l2 = tmp.length2();
|
||||
if (l2<radius*radius)
|
||||
{
|
||||
dist = btSqrt(l2);
|
||||
dist = b3Sqrt(l2);
|
||||
if (dist>minDist)
|
||||
{
|
||||
minDist = dist;
|
||||
@@ -833,32 +833,32 @@ void computeContactSphereConvex(int pairIndex,
|
||||
}
|
||||
|
||||
|
||||
void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray<btInt2>* pairs, int nPairs,
|
||||
const btOpenCLArray<b3RigidBodyCL>* bodyBuf,
|
||||
btOpenCLArray<b3Contact4>* contactOut, int& nContacts,
|
||||
void GpuSatCollision::computeConvexConvexContactsGPUSAT( const b3OpenCLArray<b3Int2>* pairs, int nPairs,
|
||||
const b3OpenCLArray<b3RigidBodyCL>* bodyBuf,
|
||||
b3OpenCLArray<b3Contact4>* contactOut, int& nContacts,
|
||||
int maxContactCapacity,
|
||||
const btOpenCLArray<b3ConvexPolyhedronCL>& convexData,
|
||||
const btOpenCLArray<b3Vector3>& gpuVertices,
|
||||
const btOpenCLArray<b3Vector3>& gpuUniqueEdges,
|
||||
const btOpenCLArray<btGpuFace>& gpuFaces,
|
||||
const btOpenCLArray<int>& gpuIndices,
|
||||
const btOpenCLArray<b3Collidable>& gpuCollidables,
|
||||
const btOpenCLArray<btGpuChildShape>& gpuChildShapes,
|
||||
const b3OpenCLArray<b3ConvexPolyhedronCL>& convexData,
|
||||
const b3OpenCLArray<b3Vector3>& gpuVertices,
|
||||
const b3OpenCLArray<b3Vector3>& gpuUniqueEdges,
|
||||
const b3OpenCLArray<b3GpuFace>& gpuFaces,
|
||||
const b3OpenCLArray<int>& gpuIndices,
|
||||
const b3OpenCLArray<b3Collidable>& gpuCollidables,
|
||||
const b3OpenCLArray<b3GpuChildShape>& gpuChildShapes,
|
||||
|
||||
const btOpenCLArray<btYetAnotherAabb>& clAabbsWS,
|
||||
btOpenCLArray<b3Vector3>& worldVertsB1GPU,
|
||||
btOpenCLArray<btInt4>& clippingFacesOutGPU,
|
||||
btOpenCLArray<b3Vector3>& worldNormalsAGPU,
|
||||
btOpenCLArray<b3Vector3>& worldVertsA1GPU,
|
||||
btOpenCLArray<b3Vector3>& worldVertsB2GPU,
|
||||
const b3OpenCLArray<b3YetAnotherAabb>& clAabbsWS,
|
||||
b3OpenCLArray<b3Vector3>& worldVertsB1GPU,
|
||||
b3OpenCLArray<b3Int4>& clippingFacesOutGPU,
|
||||
b3OpenCLArray<b3Vector3>& worldNormalsAGPU,
|
||||
b3OpenCLArray<b3Vector3>& worldVertsA1GPU,
|
||||
b3OpenCLArray<b3Vector3>& worldVertsB2GPU,
|
||||
b3AlignedObjectArray<class b3OptimizedBvh*>& bvhData,
|
||||
btOpenCLArray<btQuantizedBvhNode>* treeNodesGPU,
|
||||
btOpenCLArray<btBvhSubtreeInfo>* subTreesGPU,
|
||||
btOpenCLArray<b3BvhInfo>* bvhInfo,
|
||||
b3OpenCLArray<b3QuantizedBvhNode>* treeNodesGPU,
|
||||
b3OpenCLArray<b3BvhSubtreeInfo>* subTreesGPU,
|
||||
b3OpenCLArray<b3BvhInfo>* bvhInfo,
|
||||
|
||||
int numObjects,
|
||||
int maxTriConvexPairCapacity,
|
||||
btOpenCLArray<btInt4>& triangleConvexPairsOut,
|
||||
b3OpenCLArray<b3Int4>& triangleConvexPairsOut,
|
||||
int& numTriConvexPairsOut
|
||||
)
|
||||
{
|
||||
@@ -868,9 +868,9 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray<btI
|
||||
|
||||
//#define CHECK_ON_HOST
|
||||
#ifdef CHECK_ON_HOST
|
||||
b3AlignedObjectArray<btYetAnotherAabb> hostAabbs;
|
||||
b3AlignedObjectArray<b3YetAnotherAabb> hostAabbs;
|
||||
clAabbsWS.copyToHost(hostAabbs);
|
||||
b3AlignedObjectArray<btInt2> hostPairs;
|
||||
b3AlignedObjectArray<b3Int2> hostPairs;
|
||||
pairs->copyToHost(hostPairs);
|
||||
|
||||
b3AlignedObjectArray<b3RigidBodyCL> hostBodyBuf;
|
||||
@@ -886,18 +886,18 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray<btI
|
||||
|
||||
b3AlignedObjectArray<b3Vector3> hostUniqueEdges;
|
||||
gpuUniqueEdges.copyToHost(hostUniqueEdges);
|
||||
b3AlignedObjectArray<btGpuFace> hostFaces;
|
||||
b3AlignedObjectArray<b3GpuFace> hostFaces;
|
||||
gpuFaces.copyToHost(hostFaces);
|
||||
b3AlignedObjectArray<int> hostIndices;
|
||||
gpuIndices.copyToHost(hostIndices);
|
||||
b3AlignedObjectArray<b3Collidable> hostCollidables;
|
||||
gpuCollidables.copyToHost(hostCollidables);
|
||||
|
||||
b3AlignedObjectArray<btGpuChildShape> cpuChildShapes;
|
||||
b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes;
|
||||
gpuChildShapes.copyToHost(cpuChildShapes);
|
||||
|
||||
|
||||
b3AlignedObjectArray<btInt4> hostTriangleConvexPairs;
|
||||
b3AlignedObjectArray<b3Int4> hostTriangleConvexPairs;
|
||||
|
||||
b3AlignedObjectArray<b3Contact4> hostContacts;
|
||||
if (nContacts)
|
||||
@@ -984,22 +984,22 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray<btI
|
||||
{
|
||||
m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true);
|
||||
|
||||
BT_PROFILE("primitiveContactsKernel");
|
||||
btBufferInfoCL bInfo[] = {
|
||||
btBufferInfoCL( pairs->getBufferCL(), true ),
|
||||
btBufferInfoCL( bodyBuf->getBufferCL(),true),
|
||||
btBufferInfoCL( gpuCollidables.getBufferCL(),true),
|
||||
btBufferInfoCL( convexData.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuVertices.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuUniqueEdges.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuFaces.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuIndices.getBufferCL(),true),
|
||||
btBufferInfoCL( contactOut->getBufferCL()),
|
||||
btBufferInfoCL( m_totalContactsOut.getBufferCL())
|
||||
B3_PROFILE("primitiveContactsKernel");
|
||||
b3BufferInfoCL bInfo[] = {
|
||||
b3BufferInfoCL( pairs->getBufferCL(), true ),
|
||||
b3BufferInfoCL( bodyBuf->getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuCollidables.getBufferCL(),true),
|
||||
b3BufferInfoCL( convexData.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuVertices.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuFaces.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuIndices.getBufferCL(),true),
|
||||
b3BufferInfoCL( contactOut->getBufferCL()),
|
||||
b3BufferInfoCL( m_totalContactsOut.getBufferCL())
|
||||
};
|
||||
|
||||
btLauncherCL launcher(m_queue, m_primitiveContactsKernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3LauncherCL launcher(m_queue, m_primitiveContactsKernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( nPairs );
|
||||
launcher.setConst(maxContactCapacity);
|
||||
int num = nPairs;
|
||||
@@ -1013,33 +1013,33 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray<btI
|
||||
|
||||
#endif//CHECK_ON_HOST
|
||||
|
||||
BT_PROFILE("computeConvexConvexContactsGPUSAT");
|
||||
B3_PROFILE("computeConvexConvexContactsGPUSAT");
|
||||
// printf("nContacts = %d\n",nContacts);
|
||||
|
||||
btOpenCLArray<b3Vector3> sepNormals(m_context,m_queue);
|
||||
b3OpenCLArray<b3Vector3> sepNormals(m_context,m_queue);
|
||||
sepNormals.resize(nPairs);
|
||||
btOpenCLArray<int> hasSeparatingNormals(m_context,m_queue);
|
||||
b3OpenCLArray<int> hasSeparatingNormals(m_context,m_queue);
|
||||
hasSeparatingNormals.resize(nPairs);
|
||||
|
||||
int concaveCapacity=maxTriConvexPairCapacity;
|
||||
btOpenCLArray<b3Vector3> concaveSepNormals(m_context,m_queue);
|
||||
b3OpenCLArray<b3Vector3> concaveSepNormals(m_context,m_queue);
|
||||
concaveSepNormals.resize(concaveCapacity);
|
||||
|
||||
btOpenCLArray<int> numConcavePairsOut(m_context,m_queue);
|
||||
b3OpenCLArray<int> numConcavePairsOut(m_context,m_queue);
|
||||
numConcavePairsOut.push_back(0);
|
||||
|
||||
int compoundPairCapacity=65536*10;
|
||||
btOpenCLArray<btCompoundOverlappingPair> gpuCompoundPairs(m_context,m_queue);
|
||||
b3OpenCLArray<b3CompoundOverlappingPair> gpuCompoundPairs(m_context,m_queue);
|
||||
gpuCompoundPairs.resize(compoundPairCapacity);
|
||||
|
||||
btOpenCLArray<b3Vector3> gpuCompoundSepNormals(m_context,m_queue);
|
||||
b3OpenCLArray<b3Vector3> gpuCompoundSepNormals(m_context,m_queue);
|
||||
gpuCompoundSepNormals.resize(compoundPairCapacity);
|
||||
|
||||
|
||||
btOpenCLArray<int> gpuHasCompoundSepNormals(m_context,m_queue);
|
||||
b3OpenCLArray<int> gpuHasCompoundSepNormals(m_context,m_queue);
|
||||
gpuHasCompoundSepNormals.resize(compoundPairCapacity);
|
||||
|
||||
btOpenCLArray<int> numCompoundPairsOut(m_context,m_queue);
|
||||
b3OpenCLArray<int> numCompoundPairsOut(m_context,m_queue);
|
||||
numCompoundPairsOut.push_back(0);
|
||||
|
||||
int numCompoundPairs = 0;
|
||||
@@ -1053,23 +1053,23 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray<btI
|
||||
{
|
||||
|
||||
{
|
||||
BT_PROFILE("findSeparatingAxisKernel");
|
||||
btBufferInfoCL bInfo[] = {
|
||||
btBufferInfoCL( pairs->getBufferCL(), true ),
|
||||
btBufferInfoCL( bodyBuf->getBufferCL(),true),
|
||||
btBufferInfoCL( gpuCollidables.getBufferCL(),true),
|
||||
btBufferInfoCL( convexData.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuVertices.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuUniqueEdges.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuFaces.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuIndices.getBufferCL(),true),
|
||||
btBufferInfoCL( clAabbsWS.getBufferCL(),true),
|
||||
btBufferInfoCL( sepNormals.getBufferCL()),
|
||||
btBufferInfoCL( hasSeparatingNormals.getBufferCL())
|
||||
B3_PROFILE("findSeparatingAxisKernel");
|
||||
b3BufferInfoCL bInfo[] = {
|
||||
b3BufferInfoCL( pairs->getBufferCL(), true ),
|
||||
b3BufferInfoCL( bodyBuf->getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuCollidables.getBufferCL(),true),
|
||||
b3BufferInfoCL( convexData.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuVertices.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuFaces.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuIndices.getBufferCL(),true),
|
||||
b3BufferInfoCL( clAabbsWS.getBufferCL(),true),
|
||||
b3BufferInfoCL( sepNormals.getBufferCL()),
|
||||
b3BufferInfoCL( hasSeparatingNormals.getBufferCL())
|
||||
};
|
||||
|
||||
btLauncherCL launcher(m_queue, m_findSeparatingAxisKernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3LauncherCL launcher(m_queue, m_findSeparatingAxisKernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( nPairs );
|
||||
|
||||
int num = nPairs;
|
||||
@@ -1086,9 +1086,9 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray<btI
|
||||
|
||||
if (treeNodesGPU->size() && treeNodesGPU->size())
|
||||
{
|
||||
BT_PROFILE("m_bvhTraversalKernel");
|
||||
B3_PROFILE("m_bvhTraversalKernel");
|
||||
numConcavePairs = numConcavePairsOut.at(0);
|
||||
btLauncherCL launcher(m_queue, m_bvhTraversalKernel);
|
||||
b3LauncherCL launcher(m_queue, m_bvhTraversalKernel);
|
||||
launcher.setBuffer( pairs->getBufferCL());
|
||||
launcher.setBuffer( bodyBuf->getBufferCL());
|
||||
launcher.setBuffer( gpuCollidables.getBufferCL());
|
||||
@@ -1117,23 +1117,23 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray<btI
|
||||
if (numConcavePairs)
|
||||
{
|
||||
//now perform a SAT test for each triangle-convex element (stored in triangleConvexPairsOut)
|
||||
BT_PROFILE("findConcaveSeparatingAxisKernel");
|
||||
btBufferInfoCL bInfo[] = {
|
||||
btBufferInfoCL( triangleConvexPairsOut.getBufferCL() ),
|
||||
btBufferInfoCL( bodyBuf->getBufferCL(),true),
|
||||
btBufferInfoCL( gpuCollidables.getBufferCL(),true),
|
||||
btBufferInfoCL( convexData.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuVertices.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuUniqueEdges.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuFaces.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuIndices.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuChildShapes.getBufferCL(),true),
|
||||
btBufferInfoCL( clAabbsWS.getBufferCL(),true),
|
||||
btBufferInfoCL( concaveSepNormals.getBufferCL())
|
||||
B3_PROFILE("findConcaveSeparatingAxisKernel");
|
||||
b3BufferInfoCL bInfo[] = {
|
||||
b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ),
|
||||
b3BufferInfoCL( bodyBuf->getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuCollidables.getBufferCL(),true),
|
||||
b3BufferInfoCL( convexData.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuVertices.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuFaces.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuIndices.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuChildShapes.getBufferCL(),true),
|
||||
b3BufferInfoCL( clAabbsWS.getBufferCL(),true),
|
||||
b3BufferInfoCL( concaveSepNormals.getBufferCL())
|
||||
};
|
||||
|
||||
btLauncherCL launcher(m_queue, m_findConcaveSeparatingAxisKernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisKernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
|
||||
launcher.setConst( numConcavePairs );
|
||||
|
||||
@@ -1143,7 +1143,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray<btI
|
||||
|
||||
// b3AlignedObjectArray<b3Vector3> cpuCompoundSepNormals;
|
||||
// concaveSepNormals.copyToHost(cpuCompoundSepNormals);
|
||||
// b3AlignedObjectArray<btInt4> cpuConcavePairs;
|
||||
// b3AlignedObjectArray<b3Int4> cpuConcavePairs;
|
||||
// triangleConvexPairsOut.copyToHost(cpuConcavePairs);
|
||||
|
||||
|
||||
@@ -1156,25 +1156,25 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray<btI
|
||||
|
||||
if (1)
|
||||
{
|
||||
BT_PROFILE("findCompoundPairsKernel");
|
||||
btBufferInfoCL bInfo[] =
|
||||
B3_PROFILE("findCompoundPairsKernel");
|
||||
b3BufferInfoCL bInfo[] =
|
||||
{
|
||||
btBufferInfoCL( pairs->getBufferCL(), true ),
|
||||
btBufferInfoCL( bodyBuf->getBufferCL(),true),
|
||||
btBufferInfoCL( gpuCollidables.getBufferCL(),true),
|
||||
btBufferInfoCL( convexData.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuVertices.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuUniqueEdges.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuFaces.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuIndices.getBufferCL(),true),
|
||||
btBufferInfoCL( clAabbsWS.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuChildShapes.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuCompoundPairs.getBufferCL()),
|
||||
btBufferInfoCL( numCompoundPairsOut.getBufferCL())
|
||||
b3BufferInfoCL( pairs->getBufferCL(), true ),
|
||||
b3BufferInfoCL( bodyBuf->getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuCollidables.getBufferCL(),true),
|
||||
b3BufferInfoCL( convexData.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuVertices.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuFaces.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuIndices.getBufferCL(),true),
|
||||
b3BufferInfoCL( clAabbsWS.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuChildShapes.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuCompoundPairs.getBufferCL()),
|
||||
b3BufferInfoCL( numCompoundPairsOut.getBufferCL())
|
||||
};
|
||||
|
||||
btLauncherCL launcher(m_queue, m_findCompoundPairsKernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3LauncherCL launcher(m_queue, m_findCompoundPairsKernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( nPairs );
|
||||
launcher.setConst( compoundPairCapacity);
|
||||
|
||||
@@ -1197,25 +1197,25 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray<btI
|
||||
if (numCompoundPairs)
|
||||
{
|
||||
#ifndef CHECK_ON_HOST
|
||||
BT_PROFILE("processCompoundPairsPrimitivesKernel");
|
||||
btBufferInfoCL bInfo[] =
|
||||
B3_PROFILE("processCompoundPairsPrimitivesKernel");
|
||||
b3BufferInfoCL bInfo[] =
|
||||
{
|
||||
btBufferInfoCL( gpuCompoundPairs.getBufferCL(), true ),
|
||||
btBufferInfoCL( bodyBuf->getBufferCL(),true),
|
||||
btBufferInfoCL( gpuCollidables.getBufferCL(),true),
|
||||
btBufferInfoCL( convexData.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuVertices.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuUniqueEdges.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuFaces.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuIndices.getBufferCL(),true),
|
||||
btBufferInfoCL( clAabbsWS.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuChildShapes.getBufferCL(),true),
|
||||
btBufferInfoCL( contactOut->getBufferCL()),
|
||||
btBufferInfoCL( m_totalContactsOut.getBufferCL())
|
||||
b3BufferInfoCL( gpuCompoundPairs.getBufferCL(), true ),
|
||||
b3BufferInfoCL( bodyBuf->getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuCollidables.getBufferCL(),true),
|
||||
b3BufferInfoCL( convexData.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuVertices.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuFaces.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuIndices.getBufferCL(),true),
|
||||
b3BufferInfoCL( clAabbsWS.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuChildShapes.getBufferCL(),true),
|
||||
b3BufferInfoCL( contactOut->getBufferCL()),
|
||||
b3BufferInfoCL( m_totalContactsOut.getBufferCL())
|
||||
};
|
||||
|
||||
btLauncherCL launcher(m_queue, m_processCompoundPairsPrimitivesKernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3LauncherCL launcher(m_queue, m_processCompoundPairsPrimitivesKernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( numCompoundPairs );
|
||||
launcher.setConst(maxContactCapacity);
|
||||
|
||||
@@ -1230,25 +1230,25 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray<btI
|
||||
if (numCompoundPairs)
|
||||
{
|
||||
|
||||
BT_PROFILE("processCompoundPairsKernel");
|
||||
btBufferInfoCL bInfo[] =
|
||||
B3_PROFILE("processCompoundPairsKernel");
|
||||
b3BufferInfoCL bInfo[] =
|
||||
{
|
||||
btBufferInfoCL( gpuCompoundPairs.getBufferCL(), true ),
|
||||
btBufferInfoCL( bodyBuf->getBufferCL(),true),
|
||||
btBufferInfoCL( gpuCollidables.getBufferCL(),true),
|
||||
btBufferInfoCL( convexData.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuVertices.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuUniqueEdges.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuFaces.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuIndices.getBufferCL(),true),
|
||||
btBufferInfoCL( clAabbsWS.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuChildShapes.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuCompoundSepNormals.getBufferCL()),
|
||||
btBufferInfoCL( gpuHasCompoundSepNormals.getBufferCL())
|
||||
b3BufferInfoCL( gpuCompoundPairs.getBufferCL(), true ),
|
||||
b3BufferInfoCL( bodyBuf->getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuCollidables.getBufferCL(),true),
|
||||
b3BufferInfoCL( convexData.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuVertices.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuFaces.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuIndices.getBufferCL(),true),
|
||||
b3BufferInfoCL( clAabbsWS.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuChildShapes.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuCompoundSepNormals.getBufferCL()),
|
||||
b3BufferInfoCL( gpuHasCompoundSepNormals.getBufferCL())
|
||||
};
|
||||
|
||||
btLauncherCL launcher(m_queue, m_processCompoundPairsKernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3LauncherCL launcher(m_queue, m_processCompoundPairsKernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( numCompoundPairs );
|
||||
|
||||
int num = numCompoundPairs;
|
||||
@@ -1277,24 +1277,24 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray<btI
|
||||
{
|
||||
if (numConcavePairs)
|
||||
{
|
||||
BT_PROFILE("findConcaveSphereContactsKernel");
|
||||
B3_PROFILE("findConcaveSphereContactsKernel");
|
||||
nContacts = m_totalContactsOut.at(0);
|
||||
btBufferInfoCL bInfo[] = {
|
||||
btBufferInfoCL( triangleConvexPairsOut.getBufferCL() ),
|
||||
btBufferInfoCL( bodyBuf->getBufferCL(),true),
|
||||
btBufferInfoCL( gpuCollidables.getBufferCL(),true),
|
||||
btBufferInfoCL( convexData.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuVertices.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuUniqueEdges.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuFaces.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuIndices.getBufferCL(),true),
|
||||
btBufferInfoCL( clAabbsWS.getBufferCL(),true),
|
||||
btBufferInfoCL( contactOut->getBufferCL()),
|
||||
btBufferInfoCL( m_totalContactsOut.getBufferCL())
|
||||
b3BufferInfoCL bInfo[] = {
|
||||
b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ),
|
||||
b3BufferInfoCL( bodyBuf->getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuCollidables.getBufferCL(),true),
|
||||
b3BufferInfoCL( convexData.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuVertices.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuFaces.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuIndices.getBufferCL(),true),
|
||||
b3BufferInfoCL( clAabbsWS.getBufferCL(),true),
|
||||
b3BufferInfoCL( contactOut->getBufferCL()),
|
||||
b3BufferInfoCL( m_totalContactsOut.getBufferCL())
|
||||
};
|
||||
|
||||
btLauncherCL launcher(m_queue, m_findConcaveSphereContactsKernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3LauncherCL launcher(m_queue, m_findConcaveSphereContactsKernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
|
||||
launcher.setConst( numConcavePairs );
|
||||
launcher.setConst(maxContactCapacity);
|
||||
@@ -1317,7 +1317,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray<btI
|
||||
|
||||
if (contactClippingOnGpu)
|
||||
{
|
||||
//BT_PROFILE("clipHullHullKernel");
|
||||
//B3_PROFILE("clipHullHullKernel");
|
||||
|
||||
|
||||
m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true);
|
||||
@@ -1330,24 +1330,24 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray<btI
|
||||
// nContacts = m_totalContactsOut.at(0);
|
||||
// printf("nContacts before = %d\n", nContacts);
|
||||
|
||||
BT_PROFILE("clipHullHullConcaveConvexKernel");
|
||||
B3_PROFILE("clipHullHullConcaveConvexKernel");
|
||||
nContacts = m_totalContactsOut.at(0);
|
||||
btBufferInfoCL bInfo[] = {
|
||||
btBufferInfoCL( triangleConvexPairsOut.getBufferCL(), true ),
|
||||
btBufferInfoCL( bodyBuf->getBufferCL(),true),
|
||||
btBufferInfoCL( gpuCollidables.getBufferCL(),true),
|
||||
btBufferInfoCL( convexData.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuVertices.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuUniqueEdges.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuFaces.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuIndices.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuChildShapes.getBufferCL(),true),
|
||||
btBufferInfoCL( concaveSepNormals.getBufferCL()),
|
||||
btBufferInfoCL( contactOut->getBufferCL()),
|
||||
btBufferInfoCL( m_totalContactsOut.getBufferCL())
|
||||
b3BufferInfoCL bInfo[] = {
|
||||
b3BufferInfoCL( triangleConvexPairsOut.getBufferCL(), true ),
|
||||
b3BufferInfoCL( bodyBuf->getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuCollidables.getBufferCL(),true),
|
||||
b3BufferInfoCL( convexData.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuVertices.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuFaces.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuIndices.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuChildShapes.getBufferCL(),true),
|
||||
b3BufferInfoCL( concaveSepNormals.getBufferCL()),
|
||||
b3BufferInfoCL( contactOut->getBufferCL()),
|
||||
b3BufferInfoCL( m_totalContactsOut.getBufferCL())
|
||||
};
|
||||
btLauncherCL launcher(m_queue, m_clipHullHullConcaveConvexKernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3LauncherCL launcher(m_queue, m_clipHullHullConcaveConvexKernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( numConcavePairs );
|
||||
int num = numConcavePairs;
|
||||
launcher.launch1D( num);
|
||||
@@ -1364,7 +1364,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray<btI
|
||||
//convex-convex contact clipping
|
||||
if (1)
|
||||
{
|
||||
BT_PROFILE("clipHullHullKernel");
|
||||
B3_PROFILE("clipHullHullKernel");
|
||||
bool breakupKernel = false;
|
||||
|
||||
#ifdef __APPLE__
|
||||
@@ -1396,26 +1396,26 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray<btI
|
||||
|
||||
|
||||
{
|
||||
BT_PROFILE("findClippingFacesKernel");
|
||||
btBufferInfoCL bInfo[] = {
|
||||
btBufferInfoCL( pairs->getBufferCL(), true ),
|
||||
btBufferInfoCL( bodyBuf->getBufferCL(),true),
|
||||
btBufferInfoCL( gpuCollidables.getBufferCL(),true),
|
||||
btBufferInfoCL( convexData.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuVertices.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuUniqueEdges.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuFaces.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuIndices.getBufferCL(),true),
|
||||
btBufferInfoCL( sepNormals.getBufferCL()),
|
||||
btBufferInfoCL( hasSeparatingNormals.getBufferCL()),
|
||||
btBufferInfoCL( clippingFacesOutGPU.getBufferCL()),
|
||||
btBufferInfoCL( worldVertsA1GPU.getBufferCL()),
|
||||
btBufferInfoCL( worldNormalsAGPU.getBufferCL()),
|
||||
btBufferInfoCL( worldVertsB1GPU.getBufferCL())
|
||||
B3_PROFILE("findClippingFacesKernel");
|
||||
b3BufferInfoCL bInfo[] = {
|
||||
b3BufferInfoCL( pairs->getBufferCL(), true ),
|
||||
b3BufferInfoCL( bodyBuf->getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuCollidables.getBufferCL(),true),
|
||||
b3BufferInfoCL( convexData.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuVertices.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuFaces.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuIndices.getBufferCL(),true),
|
||||
b3BufferInfoCL( sepNormals.getBufferCL()),
|
||||
b3BufferInfoCL( hasSeparatingNormals.getBufferCL()),
|
||||
b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()),
|
||||
b3BufferInfoCL( worldVertsA1GPU.getBufferCL()),
|
||||
b3BufferInfoCL( worldNormalsAGPU.getBufferCL()),
|
||||
b3BufferInfoCL( worldVertsB1GPU.getBufferCL())
|
||||
};
|
||||
|
||||
btLauncherCL launcher(m_queue, m_findClippingFacesKernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3LauncherCL launcher(m_queue, m_findClippingFacesKernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( vertexFaceCapacity);
|
||||
launcher.setConst( nPairs );
|
||||
int num = nPairs;
|
||||
@@ -1431,26 +1431,26 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray<btI
|
||||
///clip face B against face A, reduce contacts and append them to a global contact array
|
||||
if (1)
|
||||
{
|
||||
BT_PROFILE("clipFacesAndContactReductionKernel");
|
||||
B3_PROFILE("clipFacesAndContactReductionKernel");
|
||||
//nContacts = m_totalContactsOut.at(0);
|
||||
//int h = hasSeparatingNormals.at(0);
|
||||
//int4 p = clippingFacesOutGPU.at(0);
|
||||
btBufferInfoCL bInfo[] = {
|
||||
btBufferInfoCL( pairs->getBufferCL(), true ),
|
||||
btBufferInfoCL( bodyBuf->getBufferCL(),true),
|
||||
btBufferInfoCL( sepNormals.getBufferCL()),
|
||||
btBufferInfoCL( hasSeparatingNormals.getBufferCL()),
|
||||
btBufferInfoCL( contactOut->getBufferCL()),
|
||||
btBufferInfoCL( clippingFacesOutGPU.getBufferCL()),
|
||||
btBufferInfoCL( worldVertsA1GPU.getBufferCL()),
|
||||
btBufferInfoCL( worldNormalsAGPU.getBufferCL()),
|
||||
btBufferInfoCL( worldVertsB1GPU.getBufferCL()),
|
||||
btBufferInfoCL( worldVertsB2GPU.getBufferCL()),
|
||||
btBufferInfoCL( m_totalContactsOut.getBufferCL())
|
||||
b3BufferInfoCL bInfo[] = {
|
||||
b3BufferInfoCL( pairs->getBufferCL(), true ),
|
||||
b3BufferInfoCL( bodyBuf->getBufferCL(),true),
|
||||
b3BufferInfoCL( sepNormals.getBufferCL()),
|
||||
b3BufferInfoCL( hasSeparatingNormals.getBufferCL()),
|
||||
b3BufferInfoCL( contactOut->getBufferCL()),
|
||||
b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()),
|
||||
b3BufferInfoCL( worldVertsA1GPU.getBufferCL()),
|
||||
b3BufferInfoCL( worldNormalsAGPU.getBufferCL()),
|
||||
b3BufferInfoCL( worldVertsB1GPU.getBufferCL()),
|
||||
b3BufferInfoCL( worldVertsB2GPU.getBufferCL()),
|
||||
b3BufferInfoCL( m_totalContactsOut.getBufferCL())
|
||||
};
|
||||
|
||||
btLauncherCL launcher(m_queue, m_clipFacesAndContactReductionKernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3LauncherCL launcher(m_queue, m_clipFacesAndContactReductionKernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst(vertexFaceCapacity);
|
||||
|
||||
launcher.setConst( nPairs );
|
||||
@@ -1479,21 +1479,21 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray<btI
|
||||
contactOut->reserve(nContacts+nPairs);
|
||||
|
||||
{
|
||||
BT_PROFILE("newContactReductionKernel");
|
||||
btBufferInfoCL bInfo[] =
|
||||
B3_PROFILE("newContactReductionKernel");
|
||||
b3BufferInfoCL bInfo[] =
|
||||
{
|
||||
btBufferInfoCL( pairs->getBufferCL(), true ),
|
||||
btBufferInfoCL( bodyBuf->getBufferCL(),true),
|
||||
btBufferInfoCL( sepNormals.getBufferCL()),
|
||||
btBufferInfoCL( hasSeparatingNormals.getBufferCL()),
|
||||
btBufferInfoCL( contactOut->getBufferCL()),
|
||||
btBufferInfoCL( clippingFacesOutGPU.getBufferCL()),
|
||||
btBufferInfoCL( worldVertsB2GPU.getBufferCL()),
|
||||
btBufferInfoCL( m_totalContactsOut.getBufferCL())
|
||||
b3BufferInfoCL( pairs->getBufferCL(), true ),
|
||||
b3BufferInfoCL( bodyBuf->getBufferCL(),true),
|
||||
b3BufferInfoCL( sepNormals.getBufferCL()),
|
||||
b3BufferInfoCL( hasSeparatingNormals.getBufferCL()),
|
||||
b3BufferInfoCL( contactOut->getBufferCL()),
|
||||
b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()),
|
||||
b3BufferInfoCL( worldVertsB2GPU.getBufferCL()),
|
||||
b3BufferInfoCL( m_totalContactsOut.getBufferCL())
|
||||
};
|
||||
|
||||
btLauncherCL launcher(m_queue, m_newContactReductionKernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3LauncherCL launcher(m_queue, m_newContactReductionKernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst(vertexFaceCapacity);
|
||||
launcher.setConst( nPairs );
|
||||
int num = nPairs;
|
||||
@@ -1514,22 +1514,22 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray<btI
|
||||
|
||||
if (nPairs)
|
||||
{
|
||||
btBufferInfoCL bInfo[] = {
|
||||
btBufferInfoCL( pairs->getBufferCL(), true ),
|
||||
btBufferInfoCL( bodyBuf->getBufferCL(),true),
|
||||
btBufferInfoCL( gpuCollidables.getBufferCL(),true),
|
||||
btBufferInfoCL( convexData.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuVertices.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuUniqueEdges.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuFaces.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuIndices.getBufferCL(),true),
|
||||
btBufferInfoCL( sepNormals.getBufferCL()),
|
||||
btBufferInfoCL( hasSeparatingNormals.getBufferCL()),
|
||||
btBufferInfoCL( contactOut->getBufferCL()),
|
||||
btBufferInfoCL( m_totalContactsOut.getBufferCL())
|
||||
b3BufferInfoCL bInfo[] = {
|
||||
b3BufferInfoCL( pairs->getBufferCL(), true ),
|
||||
b3BufferInfoCL( bodyBuf->getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuCollidables.getBufferCL(),true),
|
||||
b3BufferInfoCL( convexData.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuVertices.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuFaces.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuIndices.getBufferCL(),true),
|
||||
b3BufferInfoCL( sepNormals.getBufferCL()),
|
||||
b3BufferInfoCL( hasSeparatingNormals.getBufferCL()),
|
||||
b3BufferInfoCL( contactOut->getBufferCL()),
|
||||
b3BufferInfoCL( m_totalContactsOut.getBufferCL())
|
||||
};
|
||||
btLauncherCL launcher(m_queue, m_clipHullHullKernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3LauncherCL launcher(m_queue, m_clipHullHullKernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( nPairs );
|
||||
int num = nPairs;
|
||||
launcher.launch1D( num);
|
||||
@@ -1543,23 +1543,23 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray<btI
|
||||
|
||||
if (nCompoundsPairs)
|
||||
{
|
||||
btBufferInfoCL bInfo[] = {
|
||||
btBufferInfoCL( gpuCompoundPairs.getBufferCL(), true ),
|
||||
btBufferInfoCL( bodyBuf->getBufferCL(),true),
|
||||
btBufferInfoCL( gpuCollidables.getBufferCL(),true),
|
||||
btBufferInfoCL( convexData.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuVertices.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuUniqueEdges.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuFaces.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuIndices.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuChildShapes.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuCompoundSepNormals.getBufferCL(),true),
|
||||
btBufferInfoCL( gpuHasCompoundSepNormals.getBufferCL(),true),
|
||||
btBufferInfoCL( contactOut->getBufferCL()),
|
||||
btBufferInfoCL( m_totalContactsOut.getBufferCL())
|
||||
b3BufferInfoCL bInfo[] = {
|
||||
b3BufferInfoCL( gpuCompoundPairs.getBufferCL(), true ),
|
||||
b3BufferInfoCL( bodyBuf->getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuCollidables.getBufferCL(),true),
|
||||
b3BufferInfoCL( convexData.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuVertices.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuFaces.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuIndices.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuChildShapes.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuCompoundSepNormals.getBufferCL(),true),
|
||||
b3BufferInfoCL( gpuHasCompoundSepNormals.getBufferCL(),true),
|
||||
b3BufferInfoCL( contactOut->getBufferCL()),
|
||||
b3BufferInfoCL( m_totalContactsOut.getBufferCL())
|
||||
};
|
||||
btLauncherCL launcher(m_queue, m_clipCompoundsHullHullKernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3LauncherCL launcher(m_queue, m_clipCompoundsHullHullKernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( nCompoundsPairs );
|
||||
launcher.setConst(maxContactCapacity);
|
||||
|
||||
|
||||
@@ -2,22 +2,22 @@
|
||||
#ifndef _CONVEX_HULL_CONTACT_H
|
||||
#define _CONVEX_HULL_CONTACT_H
|
||||
|
||||
#include "parallel_primitives/host/btOpenCLArray.h"
|
||||
#include "parallel_primitives/host/b3OpenCLArray.h"
|
||||
#include "Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h"
|
||||
#include "Bullet3Common/b3AlignedObjectArray.h"
|
||||
#include "b3ConvexUtility.h"
|
||||
#include "b3ConvexPolyhedronCL.h"
|
||||
#include "b3Collidable.h"
|
||||
#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h"
|
||||
#include "parallel_primitives/host/btInt2.h"
|
||||
#include "parallel_primitives/host/btInt4.h"
|
||||
#include "parallel_primitives/host/b3Int2.h"
|
||||
#include "parallel_primitives/host/b3Int4.h"
|
||||
#include "b3OptimizedBvh.h"
|
||||
#include "b3BvhInfo.h"
|
||||
|
||||
//#include "../../dynamics/basic_demo/Stubs/ChNarrowPhase.h"
|
||||
|
||||
|
||||
struct btYetAnotherAabb
|
||||
struct b3YetAnotherAabb
|
||||
{
|
||||
union
|
||||
{
|
||||
@@ -59,37 +59,37 @@ struct GpuSatCollision
|
||||
cl_kernel m_processCompoundPairsPrimitivesKernel;
|
||||
|
||||
|
||||
btOpenCLArray<int> m_totalContactsOut;
|
||||
b3OpenCLArray<int> m_totalContactsOut;
|
||||
|
||||
GpuSatCollision(cl_context ctx,cl_device_id device, cl_command_queue q );
|
||||
virtual ~GpuSatCollision();
|
||||
|
||||
|
||||
void computeConvexConvexContactsGPUSAT( const btOpenCLArray<btInt2>* pairs, int nPairs,
|
||||
const btOpenCLArray<b3RigidBodyCL>* bodyBuf,
|
||||
btOpenCLArray<b3Contact4>* contactOut, int& nContacts,
|
||||
void computeConvexConvexContactsGPUSAT( const b3OpenCLArray<b3Int2>* pairs, int nPairs,
|
||||
const b3OpenCLArray<b3RigidBodyCL>* bodyBuf,
|
||||
b3OpenCLArray<b3Contact4>* contactOut, int& nContacts,
|
||||
int maxContactCapacity,
|
||||
const btOpenCLArray<b3ConvexPolyhedronCL>& hostConvexData,
|
||||
const btOpenCLArray<b3Vector3>& vertices,
|
||||
const btOpenCLArray<b3Vector3>& uniqueEdges,
|
||||
const btOpenCLArray<btGpuFace>& faces,
|
||||
const btOpenCLArray<int>& indices,
|
||||
const btOpenCLArray<b3Collidable>& gpuCollidables,
|
||||
const btOpenCLArray<btGpuChildShape>& gpuChildShapes,
|
||||
const b3OpenCLArray<b3ConvexPolyhedronCL>& hostConvexData,
|
||||
const b3OpenCLArray<b3Vector3>& vertices,
|
||||
const b3OpenCLArray<b3Vector3>& uniqueEdges,
|
||||
const b3OpenCLArray<b3GpuFace>& faces,
|
||||
const b3OpenCLArray<int>& indices,
|
||||
const b3OpenCLArray<b3Collidable>& gpuCollidables,
|
||||
const b3OpenCLArray<b3GpuChildShape>& gpuChildShapes,
|
||||
|
||||
const btOpenCLArray<btYetAnotherAabb>& clAabbs,
|
||||
btOpenCLArray<b3Vector3>& worldVertsB1GPU,
|
||||
btOpenCLArray<btInt4>& clippingFacesOutGPU,
|
||||
btOpenCLArray<b3Vector3>& worldNormalsAGPU,
|
||||
btOpenCLArray<b3Vector3>& worldVertsA1GPU,
|
||||
btOpenCLArray<b3Vector3>& worldVertsB2GPU,
|
||||
const b3OpenCLArray<b3YetAnotherAabb>& clAabbs,
|
||||
b3OpenCLArray<b3Vector3>& worldVertsB1GPU,
|
||||
b3OpenCLArray<b3Int4>& clippingFacesOutGPU,
|
||||
b3OpenCLArray<b3Vector3>& worldNormalsAGPU,
|
||||
b3OpenCLArray<b3Vector3>& worldVertsA1GPU,
|
||||
b3OpenCLArray<b3Vector3>& worldVertsB2GPU,
|
||||
b3AlignedObjectArray<class b3OptimizedBvh*>& bvhData,
|
||||
btOpenCLArray<btQuantizedBvhNode>* treeNodesGPU,
|
||||
btOpenCLArray<btBvhSubtreeInfo>* subTreesGPU,
|
||||
btOpenCLArray<b3BvhInfo>* bvhInfo,
|
||||
b3OpenCLArray<b3QuantizedBvhNode>* treeNodesGPU,
|
||||
b3OpenCLArray<b3BvhSubtreeInfo>* subTreesGPU,
|
||||
b3OpenCLArray<b3BvhInfo>* bvhInfo,
|
||||
int numObjects,
|
||||
int maxTriConvexPairCapacity,
|
||||
btOpenCLArray<btInt4>& triangleConvexPairs,
|
||||
b3OpenCLArray<b3Int4>& triangleConvexPairs,
|
||||
int& numTriConvexPairsOut
|
||||
);
|
||||
|
||||
|
||||
@@ -3,9 +3,9 @@
|
||||
|
||||
#include "Bullet3Common/b3Transform.h"
|
||||
|
||||
struct btGpuFace
|
||||
struct b3GpuFace
|
||||
{
|
||||
btVector4 m_plane;
|
||||
b3Vector4 m_plane;
|
||||
int m_indexOffset;
|
||||
int m_numIndices;
|
||||
};
|
||||
@@ -45,7 +45,7 @@ ATTRIBUTE_ALIGNED16(struct) b3ConvexPolyhedronCL
|
||||
//b3Vector3 pt = trans * vertices[m_vertexOffset+i];
|
||||
//b3Scalar dp = pt.dot(dir);
|
||||
b3Scalar dp = vertices[m_vertexOffset+i].dot(localDir);
|
||||
//btAssert(dp==dpL);
|
||||
//b3Assert(dp==dpL);
|
||||
if(dp < min) min = dp;
|
||||
if(dp > max) max = dp;
|
||||
}
|
||||
|
||||
@@ -42,7 +42,7 @@ bool b3ConvexUtility::initializePolyhedralFeatures(const b3Vector3* orgVertices,
|
||||
b3ConvexHullComputer* convexUtil = &conv;
|
||||
|
||||
|
||||
b3AlignedObjectArray<btMyFace> tmpFaces;
|
||||
b3AlignedObjectArray<b3MyFace> tmpFaces;
|
||||
tmpFaces.resize(numFaces);
|
||||
|
||||
int numVertices = convexUtil->vertices.size();
|
||||
@@ -96,7 +96,7 @@ bool b3ConvexUtility::initializePolyhedralFeatures(const b3Vector3* orgVertices,
|
||||
}
|
||||
else
|
||||
{
|
||||
btAssert(0);//degenerate?
|
||||
b3Assert(0);//degenerate?
|
||||
faceNormals[i].setZero();
|
||||
}
|
||||
|
||||
@@ -124,14 +124,14 @@ bool b3ConvexUtility::initializePolyhedralFeatures(const b3Vector3* orgVertices,
|
||||
int refFace = todoFaces[todoFaces.size()-1];
|
||||
|
||||
coplanarFaceGroup.push_back(refFace);
|
||||
btMyFace& faceA = tmpFaces[refFace];
|
||||
b3MyFace& faceA = tmpFaces[refFace];
|
||||
todoFaces.pop_back();
|
||||
|
||||
b3Vector3 faceNormalA(faceA.m_plane[0],faceA.m_plane[1],faceA.m_plane[2]);
|
||||
for (int j=todoFaces.size()-1;j>=0;j--)
|
||||
{
|
||||
int i = todoFaces[j];
|
||||
btMyFace& faceB = tmpFaces[i];
|
||||
b3MyFace& faceB = tmpFaces[i];
|
||||
b3Vector3 faceNormalB(faceB.m_plane[0],faceB.m_plane[1],faceB.m_plane[2]);
|
||||
if (faceNormalA.dot(faceNormalB)>faceWeldThreshold)
|
||||
{
|
||||
@@ -153,7 +153,7 @@ bool b3ConvexUtility::initializePolyhedralFeatures(const b3Vector3* orgVertices,
|
||||
{
|
||||
// m_polyhedron->m_faces.push_back(tmpFaces[coplanarFaceGroup[i]]);
|
||||
|
||||
btMyFace& face = tmpFaces[coplanarFaceGroup[i]];
|
||||
b3MyFace& face = tmpFaces[coplanarFaceGroup[i]];
|
||||
b3Vector3 faceNormal(face.m_plane[0],face.m_plane[1],face.m_plane[2]);
|
||||
averageFaceNormal+=faceNormal;
|
||||
for (int f=0;f<face.m_indices.size();f++)
|
||||
@@ -179,7 +179,7 @@ bool b3ConvexUtility::initializePolyhedralFeatures(const b3Vector3* orgVertices,
|
||||
|
||||
|
||||
|
||||
btMyFace combinedFace;
|
||||
b3MyFace combinedFace;
|
||||
for (int i=0;i<4;i++)
|
||||
combinedFace.m_plane[i] = tmpFaces[coplanarFaceGroup[0]].m_plane[i];
|
||||
|
||||
@@ -212,7 +212,7 @@ bool b3ConvexUtility::initializePolyhedralFeatures(const b3Vector3* orgVertices,
|
||||
// this vertex is rejected -- is anybody else using this vertex?
|
||||
for(int j = 0; j < tmpFaces.size(); j++) {
|
||||
|
||||
btMyFace& face = tmpFaces[j];
|
||||
b3MyFace& face = tmpFaces[j];
|
||||
// is this a face of the current coplanar group?
|
||||
bool is_in_current_group = false;
|
||||
for(int k = 0; k < coplanarFaceGroup.size(); k++) {
|
||||
@@ -249,7 +249,7 @@ bool b3ConvexUtility::initializePolyhedralFeatures(const b3Vector3* orgVertices,
|
||||
{
|
||||
for (int i=0;i<coplanarFaceGroup.size();i++)
|
||||
{
|
||||
btMyFace face = tmpFaces[coplanarFaceGroup[i]];
|
||||
b3MyFace face = tmpFaces[coplanarFaceGroup[i]];
|
||||
m_faces.push_back(face);
|
||||
}
|
||||
|
||||
@@ -275,14 +275,14 @@ inline bool IsAlmostZero(const b3Vector3& v)
|
||||
return true;
|
||||
}
|
||||
|
||||
struct btInternalVertexPair
|
||||
struct b3InternalVertexPair
|
||||
{
|
||||
btInternalVertexPair(short int v0,short int v1)
|
||||
b3InternalVertexPair(short int v0,short int v1)
|
||||
:m_v0(v0),
|
||||
m_v1(v1)
|
||||
{
|
||||
if (m_v1>m_v0)
|
||||
btSwap(m_v0,m_v1);
|
||||
b3Swap(m_v0,m_v1);
|
||||
}
|
||||
short int m_v0;
|
||||
short int m_v1;
|
||||
@@ -290,15 +290,15 @@ struct btInternalVertexPair
|
||||
{
|
||||
return m_v0+(m_v1<<16);
|
||||
}
|
||||
bool equals(const btInternalVertexPair& other) const
|
||||
bool equals(const b3InternalVertexPair& other) const
|
||||
{
|
||||
return m_v0==other.m_v0 && m_v1==other.m_v1;
|
||||
}
|
||||
};
|
||||
|
||||
struct btInternalEdge
|
||||
struct b3InternalEdge
|
||||
{
|
||||
btInternalEdge()
|
||||
b3InternalEdge()
|
||||
:m_face0(-1),
|
||||
m_face1(-1)
|
||||
{
|
||||
@@ -339,7 +339,7 @@ bool b3ConvexUtility::testContainment() const
|
||||
void b3ConvexUtility::initialize()
|
||||
{
|
||||
|
||||
b3HashMap<btInternalVertexPair,btInternalEdge> edges;
|
||||
b3HashMap<b3InternalVertexPair,b3InternalEdge> edges;
|
||||
|
||||
b3Scalar TotalArea = 0.0f;
|
||||
|
||||
@@ -351,8 +351,8 @@ void b3ConvexUtility::initialize()
|
||||
for(int j=0;j<NbTris;j++)
|
||||
{
|
||||
int k = (j+1)%numVertices;
|
||||
btInternalVertexPair vp(m_faces[i].m_indices[j],m_faces[i].m_indices[k]);
|
||||
btInternalEdge* edptr = edges.find(vp);
|
||||
b3InternalVertexPair vp(m_faces[i].m_indices[j],m_faces[i].m_indices[k]);
|
||||
b3InternalEdge* edptr = edges.find(vp);
|
||||
b3Vector3 edge = m_vertices[vp.m_v1]-m_vertices[vp.m_v0];
|
||||
edge.normalize();
|
||||
|
||||
@@ -383,12 +383,12 @@ void b3ConvexUtility::initialize()
|
||||
if (edptr)
|
||||
{
|
||||
//TBD: figure out why I added this assert
|
||||
// btAssert(edptr->m_face0>=0);
|
||||
// btAssert(edptr->m_face1<0);
|
||||
// b3Assert(edptr->m_face0>=0);
|
||||
// b3Assert(edptr->m_face1<0);
|
||||
edptr->m_face1 = i;
|
||||
} else
|
||||
{
|
||||
btInternalEdge ed;
|
||||
b3InternalEdge ed;
|
||||
ed.m_face0 = i;
|
||||
edges.insert(vp,ed);
|
||||
}
|
||||
@@ -404,11 +404,11 @@ void b3ConvexUtility::initialize()
|
||||
for(int j=0;j<numVertices;j++)
|
||||
{
|
||||
int k = (j+1)%numVertices;
|
||||
btInternalVertexPair vp(m_faces[i].m_indices[j],m_faces[i].m_indices[k]);
|
||||
btInternalEdge* edptr = edges.find(vp);
|
||||
btAssert(edptr);
|
||||
btAssert(edptr->m_face0>=0);
|
||||
btAssert(edptr->m_face1>=0);
|
||||
b3InternalVertexPair vp(m_faces[i].m_indices[j],m_faces[i].m_indices[k]);
|
||||
b3InternalEdge* edptr = edges.find(vp);
|
||||
b3Assert(edptr);
|
||||
b3Assert(edptr->m_face0>=0);
|
||||
b3Assert(edptr->m_face1>=0);
|
||||
|
||||
int connectedFace = (edptr->m_face0==i)?edptr->m_face1:edptr->m_face0;
|
||||
m_faces[i].m_connectedFaces[j] = connectedFace;
|
||||
@@ -445,7 +445,7 @@ void b3ConvexUtility::initialize()
|
||||
for(int i=0;i<m_faces.size();i++)
|
||||
{
|
||||
const b3Vector3 Normal(m_faces[i].m_plane[0], m_faces[i].m_plane[1], m_faces[i].m_plane[2]);
|
||||
const b3Scalar dist = btFabs(m_localCenter.dot(Normal) + m_faces[i].m_plane[3]);
|
||||
const b3Scalar dist = b3Fabs(m_localCenter.dot(Normal) + m_faces[i].m_plane[3]);
|
||||
if(dist<m_radius)
|
||||
m_radius = dist;
|
||||
}
|
||||
|
||||
@@ -23,7 +23,7 @@ subject to the following restrictions:
|
||||
#include "b3ConvexPolyhedronCL.h"
|
||||
|
||||
|
||||
struct btMyFace
|
||||
struct b3MyFace
|
||||
{
|
||||
b3AlignedObjectArray<int> m_indices;
|
||||
b3Scalar m_plane[4];
|
||||
@@ -32,7 +32,7 @@ struct btMyFace
|
||||
ATTRIBUTE_ALIGNED16(class) b3ConvexUtility
|
||||
{
|
||||
public:
|
||||
BT_DECLARE_ALIGNED_ALLOCATOR();
|
||||
B3_DECLARE_ALIGNED_ALLOCATOR();
|
||||
|
||||
b3Vector3 m_localCenter;
|
||||
b3Vector3 m_extents;
|
||||
@@ -41,7 +41,7 @@ ATTRIBUTE_ALIGNED16(class) b3ConvexUtility
|
||||
b3Scalar m_radius;
|
||||
|
||||
b3AlignedObjectArray<b3Vector3> m_vertices;
|
||||
b3AlignedObjectArray<btMyFace> m_faces;
|
||||
b3AlignedObjectArray<b3MyFace> m_faces;
|
||||
b3AlignedObjectArray<b3Vector3> m_uniqueEdges;
|
||||
|
||||
|
||||
|
||||
@@ -35,7 +35,7 @@ void b3OptimizedBvh::build(b3StridingMeshInterface* triangles, bool useQuantized
|
||||
|
||||
// NodeArray triangleNodes;
|
||||
|
||||
struct NodeTriangleCallback : public btInternalTriangleIndexCallback
|
||||
struct NodeTriangleCallback : public b3InternalTriangleIndexCallback
|
||||
{
|
||||
|
||||
NodeArray& m_triangleNodes;
|
||||
@@ -53,10 +53,10 @@ void b3OptimizedBvh::build(b3StridingMeshInterface* triangles, bool useQuantized
|
||||
|
||||
virtual void internalProcessTriangleIndex(b3Vector3* triangle,int partId,int triangleIndex)
|
||||
{
|
||||
btOptimizedBvhNode node;
|
||||
b3OptimizedBvhNode node;
|
||||
b3Vector3 aabbMin,aabbMax;
|
||||
aabbMin.setValue(b3Scalar(BT_LARGE_FLOAT),b3Scalar(BT_LARGE_FLOAT),b3Scalar(BT_LARGE_FLOAT));
|
||||
aabbMax.setValue(b3Scalar(-BT_LARGE_FLOAT),b3Scalar(-BT_LARGE_FLOAT),b3Scalar(-BT_LARGE_FLOAT));
|
||||
aabbMin.setValue(b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT));
|
||||
aabbMax.setValue(b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT));
|
||||
aabbMin.setMin(triangle[0]);
|
||||
aabbMax.setMax(triangle[0]);
|
||||
aabbMin.setMin(triangle[1]);
|
||||
@@ -76,7 +76,7 @@ void b3OptimizedBvh::build(b3StridingMeshInterface* triangles, bool useQuantized
|
||||
m_triangleNodes.push_back(node);
|
||||
}
|
||||
};
|
||||
struct QuantizedNodeTriangleCallback : public btInternalTriangleIndexCallback
|
||||
struct QuantizedNodeTriangleCallback : public b3InternalTriangleIndexCallback
|
||||
{
|
||||
QuantizedNodeArray& m_triangleNodes;
|
||||
const b3QuantizedBvh* m_optimizedTree; // for quantization
|
||||
@@ -96,15 +96,15 @@ void b3OptimizedBvh::build(b3StridingMeshInterface* triangles, bool useQuantized
|
||||
virtual void internalProcessTriangleIndex(b3Vector3* triangle,int partId,int triangleIndex)
|
||||
{
|
||||
// The partId and triangle index must fit in the same (positive) integer
|
||||
btAssert(partId < (1<<MAX_NUM_PARTS_IN_BITS));
|
||||
btAssert(triangleIndex < (1<<(31-MAX_NUM_PARTS_IN_BITS)));
|
||||
b3Assert(partId < (1<<MAX_NUM_PARTS_IN_BITS));
|
||||
b3Assert(triangleIndex < (1<<(31-MAX_NUM_PARTS_IN_BITS)));
|
||||
//negative indices are reserved for escapeIndex
|
||||
btAssert(triangleIndex>=0);
|
||||
b3Assert(triangleIndex>=0);
|
||||
|
||||
btQuantizedBvhNode node;
|
||||
b3QuantizedBvhNode node;
|
||||
b3Vector3 aabbMin,aabbMax;
|
||||
aabbMin.setValue(b3Scalar(BT_LARGE_FLOAT),b3Scalar(BT_LARGE_FLOAT),b3Scalar(BT_LARGE_FLOAT));
|
||||
aabbMax.setValue(b3Scalar(-BT_LARGE_FLOAT),b3Scalar(-BT_LARGE_FLOAT),b3Scalar(-BT_LARGE_FLOAT));
|
||||
aabbMin.setValue(b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT));
|
||||
aabbMax.setValue(b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT));
|
||||
aabbMin.setMin(triangle[0]);
|
||||
aabbMax.setMax(triangle[0]);
|
||||
aabbMin.setMin(triangle[1]);
|
||||
@@ -167,8 +167,8 @@ void b3OptimizedBvh::build(b3StridingMeshInterface* triangles, bool useQuantized
|
||||
{
|
||||
NodeTriangleCallback callback(m_leafNodes);
|
||||
|
||||
b3Vector3 aabbMin(b3Scalar(-BT_LARGE_FLOAT),b3Scalar(-BT_LARGE_FLOAT),b3Scalar(-BT_LARGE_FLOAT));
|
||||
b3Vector3 aabbMax(b3Scalar(BT_LARGE_FLOAT),b3Scalar(BT_LARGE_FLOAT),b3Scalar(BT_LARGE_FLOAT));
|
||||
b3Vector3 aabbMin(b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT));
|
||||
b3Vector3 aabbMax(b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT));
|
||||
|
||||
triangles->InternalProcessAllTriangles(&callback,aabbMin,aabbMax);
|
||||
|
||||
@@ -185,7 +185,7 @@ void b3OptimizedBvh::build(b3StridingMeshInterface* triangles, bool useQuantized
|
||||
///if the entire tree is small then subtree size, we need to create a header info for the tree
|
||||
if(m_useQuantization && !m_SubtreeHeaders.size())
|
||||
{
|
||||
btBvhSubtreeInfo& subtree = m_SubtreeHeaders.expand();
|
||||
b3BvhSubtreeInfo& subtree = m_SubtreeHeaders.expand();
|
||||
subtree.setAabbFromQuantizeNode(m_quantizedContiguousNodes[0]);
|
||||
subtree.m_rootNodeIndex = 0;
|
||||
subtree.m_subtreeSize = m_quantizedContiguousNodes[0].isLeafNode() ? 1 : m_quantizedContiguousNodes[0].getEscapeIndex();
|
||||
@@ -216,7 +216,7 @@ void b3OptimizedBvh::refit(b3StridingMeshInterface* meshInterface,const b3Vector
|
||||
int i;
|
||||
for (i=0;i<m_SubtreeHeaders.size();i++)
|
||||
{
|
||||
btBvhSubtreeInfo& subtree = m_SubtreeHeaders[i];
|
||||
b3BvhSubtreeInfo& subtree = m_SubtreeHeaders[i];
|
||||
subtree.setAabbFromQuantizeNode(m_quantizedContiguousNodes[subtree.m_rootNodeIndex]);
|
||||
}
|
||||
|
||||
@@ -232,15 +232,15 @@ void b3OptimizedBvh::refit(b3StridingMeshInterface* meshInterface,const b3Vector
|
||||
void b3OptimizedBvh::refitPartial(b3StridingMeshInterface* meshInterface,const b3Vector3& aabbMin,const b3Vector3& aabbMax)
|
||||
{
|
||||
//incrementally initialize quantization values
|
||||
btAssert(m_useQuantization);
|
||||
b3Assert(m_useQuantization);
|
||||
|
||||
btAssert(aabbMin.getX() > m_bvhAabbMin.getX());
|
||||
btAssert(aabbMin.getY() > m_bvhAabbMin.getY());
|
||||
btAssert(aabbMin.getZ() > m_bvhAabbMin.getZ());
|
||||
b3Assert(aabbMin.getX() > m_bvhAabbMin.getX());
|
||||
b3Assert(aabbMin.getY() > m_bvhAabbMin.getY());
|
||||
b3Assert(aabbMin.getZ() > m_bvhAabbMin.getZ());
|
||||
|
||||
btAssert(aabbMax.getX() < m_bvhAabbMax.getX());
|
||||
btAssert(aabbMax.getY() < m_bvhAabbMax.getY());
|
||||
btAssert(aabbMax.getZ() < m_bvhAabbMax.getZ());
|
||||
b3Assert(aabbMax.getX() < m_bvhAabbMax.getX());
|
||||
b3Assert(aabbMax.getY() < m_bvhAabbMax.getY());
|
||||
b3Assert(aabbMax.getZ() < m_bvhAabbMax.getZ());
|
||||
|
||||
///we should update all quantization values, using updateBvhNodes(meshInterface);
|
||||
///but we only update chunks that overlap the given aabb
|
||||
@@ -254,7 +254,7 @@ void b3OptimizedBvh::refitPartial(b3StridingMeshInterface* meshInterface,const b
|
||||
int i;
|
||||
for (i=0;i<this->m_SubtreeHeaders.size();i++)
|
||||
{
|
||||
btBvhSubtreeInfo& subtree = m_SubtreeHeaders[i];
|
||||
b3BvhSubtreeInfo& subtree = m_SubtreeHeaders[i];
|
||||
|
||||
//PCK: unsigned instead of bool
|
||||
unsigned overlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax);
|
||||
@@ -272,7 +272,7 @@ void b3OptimizedBvh::updateBvhNodes(b3StridingMeshInterface* meshInterface,int f
|
||||
{
|
||||
(void)index;
|
||||
|
||||
btAssert(m_useQuantization);
|
||||
b3Assert(m_useQuantization);
|
||||
|
||||
int curNodeSubPart=-1;
|
||||
|
||||
@@ -295,7 +295,7 @@ void b3OptimizedBvh::updateBvhNodes(b3StridingMeshInterface* meshInterface,int f
|
||||
{
|
||||
|
||||
|
||||
btQuantizedBvhNode& curNode = m_quantizedContiguousNodes[i];
|
||||
b3QuantizedBvhNode& curNode = m_quantizedContiguousNodes[i];
|
||||
if (curNode.isLeafNode())
|
||||
{
|
||||
//recalc aabb from triangle data
|
||||
@@ -308,7 +308,7 @@ void b3OptimizedBvh::updateBvhNodes(b3StridingMeshInterface* meshInterface,int f
|
||||
meshInterface->getLockedReadOnlyVertexIndexBase(&vertexbase,numverts, type,stride,&indexbase,indexstride,numfaces,indicestype,nodeSubPart);
|
||||
|
||||
curNodeSubPart = nodeSubPart;
|
||||
btAssert(indicestype==PHY_INTEGER||indicestype==PHY_SHORT);
|
||||
b3Assert(indicestype==PHY_INTEGER||indicestype==PHY_SHORT);
|
||||
}
|
||||
//triangles->getLockedReadOnlyVertexIndexBase(vertexBase,numVerts,
|
||||
|
||||
@@ -336,8 +336,8 @@ void b3OptimizedBvh::updateBvhNodes(b3StridingMeshInterface* meshInterface,int f
|
||||
|
||||
|
||||
|
||||
aabbMin.setValue(b3Scalar(BT_LARGE_FLOAT),b3Scalar(BT_LARGE_FLOAT),b3Scalar(BT_LARGE_FLOAT));
|
||||
aabbMax.setValue(b3Scalar(-BT_LARGE_FLOAT),b3Scalar(-BT_LARGE_FLOAT),b3Scalar(-BT_LARGE_FLOAT));
|
||||
aabbMin.setValue(b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT));
|
||||
aabbMax.setValue(b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT));
|
||||
aabbMin.setMin(triangleVerts[0]);
|
||||
aabbMax.setMax(triangleVerts[0]);
|
||||
aabbMin.setMin(triangleVerts[1]);
|
||||
@@ -352,9 +352,9 @@ void b3OptimizedBvh::updateBvhNodes(b3StridingMeshInterface* meshInterface,int f
|
||||
{
|
||||
//combine aabb from both children
|
||||
|
||||
btQuantizedBvhNode* leftChildNode = &m_quantizedContiguousNodes[i+1];
|
||||
b3QuantizedBvhNode* leftChildNode = &m_quantizedContiguousNodes[i+1];
|
||||
|
||||
btQuantizedBvhNode* rightChildNode = leftChildNode->isLeafNode() ? &m_quantizedContiguousNodes[i+2] :
|
||||
b3QuantizedBvhNode* rightChildNode = leftChildNode->isLeafNode() ? &m_quantizedContiguousNodes[i+2] :
|
||||
&m_quantizedContiguousNodes[i+1+leftChildNode->getEscapeIndex()];
|
||||
|
||||
|
||||
|
||||
@@ -15,8 +15,8 @@ subject to the following restrictions:
|
||||
|
||||
///Contains contributions from Disney Studio's
|
||||
|
||||
#ifndef BT_OPTIMIZED_BVH_H
|
||||
#define BT_OPTIMIZED_BVH_H
|
||||
#ifndef B3_OPTIMIZED_BVH_H
|
||||
#define B3_OPTIMIZED_BVH_H
|
||||
|
||||
#include "b3QuantizedBvh.h"
|
||||
|
||||
@@ -28,7 +28,7 @@ ATTRIBUTE_ALIGNED16(class) b3OptimizedBvh : public b3QuantizedBvh
|
||||
{
|
||||
|
||||
public:
|
||||
BT_DECLARE_ALIGNED_ALLOCATOR();
|
||||
B3_DECLARE_ALIGNED_ALLOCATOR();
|
||||
|
||||
protected:
|
||||
|
||||
@@ -60,6 +60,6 @@ public:
|
||||
};
|
||||
|
||||
|
||||
#endif //BT_OPTIMIZED_BVH_H
|
||||
#endif //B3_OPTIMIZED_BVH_H
|
||||
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ subject to the following restrictions:
|
||||
#define RAYAABB2
|
||||
|
||||
b3QuantizedBvh::b3QuantizedBvh() :
|
||||
m_bulletVersion(BT_BULLET_VERSION),
|
||||
m_bulletVersion(B3_BULLET_VERSION),
|
||||
m_useQuantization(false),
|
||||
m_traversalMode(TRAVERSAL_STACKLESS_CACHE_FRIENDLY)
|
||||
//m_traversalMode(TRAVERSAL_STACKLESS)
|
||||
@@ -58,7 +58,7 @@ void b3QuantizedBvh::buildInternal()
|
||||
///if the entire tree is small then subtree size, we need to create a header info for the tree
|
||||
if(m_useQuantization && !m_SubtreeHeaders.size())
|
||||
{
|
||||
btBvhSubtreeInfo& subtree = m_SubtreeHeaders.expand();
|
||||
b3BvhSubtreeInfo& subtree = m_SubtreeHeaders.expand();
|
||||
subtree.setAabbFromQuantizeNode(m_quantizedContiguousNodes[0]);
|
||||
subtree.m_rootNodeIndex = 0;
|
||||
subtree.m_subtreeSize = m_quantizedContiguousNodes[0].isLeafNode() ? 1 : m_quantizedContiguousNodes[0].getEscapeIndex();
|
||||
@@ -123,7 +123,7 @@ void b3QuantizedBvh::buildTree (int startIndex,int endIndex)
|
||||
int numIndices =endIndex-startIndex;
|
||||
int curIndex = m_curNodeIndex;
|
||||
|
||||
btAssert(numIndices>0);
|
||||
b3Assert(numIndices>0);
|
||||
|
||||
if (numIndices==1)
|
||||
{
|
||||
@@ -178,7 +178,7 @@ void b3QuantizedBvh::buildTree (int startIndex,int endIndex)
|
||||
if (m_useQuantization)
|
||||
{
|
||||
//escapeIndex is the number of nodes of this subtree
|
||||
const int sizeQuantizedNode =sizeof(btQuantizedBvhNode);
|
||||
const int sizeQuantizedNode =sizeof(b3QuantizedBvhNode);
|
||||
const int treeSizeInBytes = escapeIndex * sizeQuantizedNode;
|
||||
if (treeSizeInBytes > MAX_SUBTREE_SIZE_IN_BYTES)
|
||||
{
|
||||
@@ -195,19 +195,19 @@ void b3QuantizedBvh::buildTree (int startIndex,int endIndex)
|
||||
|
||||
void b3QuantizedBvh::updateSubtreeHeaders(int leftChildNodexIndex,int rightChildNodexIndex)
|
||||
{
|
||||
btAssert(m_useQuantization);
|
||||
b3Assert(m_useQuantization);
|
||||
|
||||
btQuantizedBvhNode& leftChildNode = m_quantizedContiguousNodes[leftChildNodexIndex];
|
||||
b3QuantizedBvhNode& leftChildNode = m_quantizedContiguousNodes[leftChildNodexIndex];
|
||||
int leftSubTreeSize = leftChildNode.isLeafNode() ? 1 : leftChildNode.getEscapeIndex();
|
||||
int leftSubTreeSizeInBytes = leftSubTreeSize * static_cast<int>(sizeof(btQuantizedBvhNode));
|
||||
int leftSubTreeSizeInBytes = leftSubTreeSize * static_cast<int>(sizeof(b3QuantizedBvhNode));
|
||||
|
||||
btQuantizedBvhNode& rightChildNode = m_quantizedContiguousNodes[rightChildNodexIndex];
|
||||
b3QuantizedBvhNode& rightChildNode = m_quantizedContiguousNodes[rightChildNodexIndex];
|
||||
int rightSubTreeSize = rightChildNode.isLeafNode() ? 1 : rightChildNode.getEscapeIndex();
|
||||
int rightSubTreeSizeInBytes = rightSubTreeSize * static_cast<int>(sizeof(btQuantizedBvhNode));
|
||||
int rightSubTreeSizeInBytes = rightSubTreeSize * static_cast<int>(sizeof(b3QuantizedBvhNode));
|
||||
|
||||
if(leftSubTreeSizeInBytes <= MAX_SUBTREE_SIZE_IN_BYTES)
|
||||
{
|
||||
btBvhSubtreeInfo& subtree = m_SubtreeHeaders.expand();
|
||||
b3BvhSubtreeInfo& subtree = m_SubtreeHeaders.expand();
|
||||
subtree.setAabbFromQuantizeNode(leftChildNode);
|
||||
subtree.m_rootNodeIndex = leftChildNodexIndex;
|
||||
subtree.m_subtreeSize = leftSubTreeSize;
|
||||
@@ -215,7 +215,7 @@ void b3QuantizedBvh::updateSubtreeHeaders(int leftChildNodexIndex,int rightChild
|
||||
|
||||
if(rightSubTreeSizeInBytes <= MAX_SUBTREE_SIZE_IN_BYTES)
|
||||
{
|
||||
btBvhSubtreeInfo& subtree = m_SubtreeHeaders.expand();
|
||||
b3BvhSubtreeInfo& subtree = m_SubtreeHeaders.expand();
|
||||
subtree.setAabbFromQuantizeNode(rightChildNode);
|
||||
subtree.m_rootNodeIndex = rightChildNodexIndex;
|
||||
subtree.m_subtreeSize = rightSubTreeSize;
|
||||
@@ -274,7 +274,7 @@ int b3QuantizedBvh::sortAndCalcSplittingIndex(int startIndex,int endIndex,int sp
|
||||
|
||||
bool unbal = (splitIndex==startIndex) || (splitIndex == (endIndex));
|
||||
(void)unbal;
|
||||
btAssert(!unbal);
|
||||
b3Assert(!unbal);
|
||||
|
||||
return splitIndex;
|
||||
}
|
||||
@@ -309,7 +309,7 @@ int b3QuantizedBvh::calcSplittingAxis(int startIndex,int endIndex)
|
||||
|
||||
|
||||
|
||||
void b3QuantizedBvh::reportAabbOverlappingNodex(btNodeOverlapCallback* nodeCallback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const
|
||||
void b3QuantizedBvh::reportAabbOverlappingNodex(b3NodeOverlapCallback* nodeCallback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const
|
||||
{
|
||||
//either choose recursive traversal (walkTree) or stackless (walkStacklessTree)
|
||||
|
||||
@@ -331,13 +331,13 @@ void b3QuantizedBvh::reportAabbOverlappingNodex(btNodeOverlapCallback* nodeCallb
|
||||
break;
|
||||
case TRAVERSAL_RECURSIVE:
|
||||
{
|
||||
const btQuantizedBvhNode* rootNode = &m_quantizedContiguousNodes[0];
|
||||
const b3QuantizedBvhNode* rootNode = &m_quantizedContiguousNodes[0];
|
||||
walkRecursiveQuantizedTreeAgainstQueryAabb(rootNode,nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
//unsupported
|
||||
btAssert(0);
|
||||
b3Assert(0);
|
||||
}
|
||||
} else
|
||||
{
|
||||
@@ -349,11 +349,11 @@ void b3QuantizedBvh::reportAabbOverlappingNodex(btNodeOverlapCallback* nodeCallb
|
||||
int maxIterations = 0;
|
||||
|
||||
|
||||
void b3QuantizedBvh::walkStacklessTree(btNodeOverlapCallback* nodeCallback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const
|
||||
void b3QuantizedBvh::walkStacklessTree(b3NodeOverlapCallback* nodeCallback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const
|
||||
{
|
||||
btAssert(!m_useQuantization);
|
||||
b3Assert(!m_useQuantization);
|
||||
|
||||
const btOptimizedBvhNode* rootNode = &m_contiguousNodes[0];
|
||||
const b3OptimizedBvhNode* rootNode = &m_contiguousNodes[0];
|
||||
int escapeIndex, curIndex = 0;
|
||||
int walkIterations = 0;
|
||||
bool isLeafNode;
|
||||
@@ -363,7 +363,7 @@ void b3QuantizedBvh::walkStacklessTree(btNodeOverlapCallback* nodeCallback,const
|
||||
while (curIndex < m_curNodeIndex)
|
||||
{
|
||||
//catch bugs in tree data
|
||||
btAssert (walkIterations < m_curNodeIndex);
|
||||
b3Assert (walkIterations < m_curNodeIndex);
|
||||
|
||||
walkIterations++;
|
||||
aabbOverlap = TestAabbAgainstAabb2(aabbMin,aabbMax,rootNode->m_aabbMinOrg,rootNode->m_aabbMaxOrg);
|
||||
@@ -394,7 +394,7 @@ void b3QuantizedBvh::walkStacklessTree(btNodeOverlapCallback* nodeCallback,const
|
||||
|
||||
/*
|
||||
///this was the original recursive traversal, before we optimized towards stackless traversal
|
||||
void b3QuantizedBvh::walkTree(btOptimizedBvhNode* rootNode,btNodeOverlapCallback* nodeCallback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const
|
||||
void b3QuantizedBvh::walkTree(b3OptimizedBvhNode* rootNode,b3NodeOverlapCallback* nodeCallback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const
|
||||
{
|
||||
bool isLeafNode, aabbOverlap = TestAabbAgainstAabb2(aabbMin,aabbMax,rootNode->m_aabbMin,rootNode->m_aabbMax);
|
||||
if (aabbOverlap)
|
||||
@@ -413,9 +413,9 @@ void b3QuantizedBvh::walkTree(btOptimizedBvhNode* rootNode,btNodeOverlapCallback
|
||||
}
|
||||
*/
|
||||
|
||||
void b3QuantizedBvh::walkRecursiveQuantizedTreeAgainstQueryAabb(const btQuantizedBvhNode* currentNode,btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const
|
||||
void b3QuantizedBvh::walkRecursiveQuantizedTreeAgainstQueryAabb(const b3QuantizedBvhNode* currentNode,b3NodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const
|
||||
{
|
||||
btAssert(m_useQuantization);
|
||||
b3Assert(m_useQuantization);
|
||||
|
||||
bool isLeafNode;
|
||||
//PCK: unsigned instead of bool
|
||||
@@ -434,10 +434,10 @@ void b3QuantizedBvh::walkRecursiveQuantizedTreeAgainstQueryAabb(const btQuantize
|
||||
} else
|
||||
{
|
||||
//process left and right children
|
||||
const btQuantizedBvhNode* leftChildNode = currentNode+1;
|
||||
const b3QuantizedBvhNode* leftChildNode = currentNode+1;
|
||||
walkRecursiveQuantizedTreeAgainstQueryAabb(leftChildNode,nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax);
|
||||
|
||||
const btQuantizedBvhNode* rightChildNode = leftChildNode->isLeafNode() ? leftChildNode+1:leftChildNode+leftChildNode->getEscapeIndex();
|
||||
const b3QuantizedBvhNode* rightChildNode = leftChildNode->isLeafNode() ? leftChildNode+1:leftChildNode+leftChildNode->getEscapeIndex();
|
||||
walkRecursiveQuantizedTreeAgainstQueryAabb(rightChildNode,nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax);
|
||||
}
|
||||
}
|
||||
@@ -445,11 +445,11 @@ void b3QuantizedBvh::walkRecursiveQuantizedTreeAgainstQueryAabb(const btQuantize
|
||||
|
||||
|
||||
|
||||
void b3QuantizedBvh::walkStacklessTreeAgainstRay(btNodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax, int startNodeIndex,int endNodeIndex) const
|
||||
void b3QuantizedBvh::walkStacklessTreeAgainstRay(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax, int startNodeIndex,int endNodeIndex) const
|
||||
{
|
||||
btAssert(!m_useQuantization);
|
||||
b3Assert(!m_useQuantization);
|
||||
|
||||
const btOptimizedBvhNode* rootNode = &m_contiguousNodes[0];
|
||||
const b3OptimizedBvhNode* rootNode = &m_contiguousNodes[0];
|
||||
int escapeIndex, curIndex = 0;
|
||||
int walkIterations = 0;
|
||||
bool isLeafNode;
|
||||
@@ -474,9 +474,9 @@ void b3QuantizedBvh::walkStacklessTreeAgainstRay(btNodeOverlapCallback* nodeCall
|
||||
lambda_max = rayDir.dot(rayTarget-raySource);
|
||||
///what about division by zero? --> just set rayDirection[i] to 1.0
|
||||
b3Vector3 rayDirectionInverse;
|
||||
rayDirectionInverse[0] = rayDir[0] == b3Scalar(0.0) ? b3Scalar(BT_LARGE_FLOAT) : b3Scalar(1.0) / rayDir[0];
|
||||
rayDirectionInverse[1] = rayDir[1] == b3Scalar(0.0) ? b3Scalar(BT_LARGE_FLOAT) : b3Scalar(1.0) / rayDir[1];
|
||||
rayDirectionInverse[2] = rayDir[2] == b3Scalar(0.0) ? b3Scalar(BT_LARGE_FLOAT) : b3Scalar(1.0) / rayDir[2];
|
||||
rayDirectionInverse[0] = rayDir[0] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDir[0];
|
||||
rayDirectionInverse[1] = rayDir[1] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDir[1];
|
||||
rayDirectionInverse[2] = rayDir[2] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDir[2];
|
||||
unsigned int sign[3] = { rayDirectionInverse[0] < 0.0, rayDirectionInverse[1] < 0.0, rayDirectionInverse[2] < 0.0};
|
||||
#endif
|
||||
|
||||
@@ -486,7 +486,7 @@ void b3QuantizedBvh::walkStacklessTreeAgainstRay(btNodeOverlapCallback* nodeCall
|
||||
{
|
||||
b3Scalar param = 1.0;
|
||||
//catch bugs in tree data
|
||||
btAssert (walkIterations < m_curNodeIndex);
|
||||
b3Assert (walkIterations < m_curNodeIndex);
|
||||
|
||||
walkIterations++;
|
||||
|
||||
@@ -503,11 +503,11 @@ void b3QuantizedBvh::walkStacklessTreeAgainstRay(btNodeOverlapCallback* nodeCall
|
||||
///careful with this check: need to check division by zero (above) and fix the unQuantize method
|
||||
///thanks Joerg/hiker for the reproduction case!
|
||||
///http://www.bulletphysics.com/Bullet/phpBB3/viewtopic.php?f=9&t=1858
|
||||
rayBoxOverlap = aabbOverlap ? btRayAabb2 (raySource, rayDirectionInverse, sign, bounds, param, 0.0f, lambda_max) : false;
|
||||
rayBoxOverlap = aabbOverlap ? b3RayAabb2 (raySource, rayDirectionInverse, sign, bounds, param, 0.0f, lambda_max) : false;
|
||||
|
||||
#else
|
||||
b3Vector3 normal;
|
||||
rayBoxOverlap = btRayAabb(raySource, rayTarget,bounds[0],bounds[1],param, normal);
|
||||
rayBoxOverlap = b3RayAabb(raySource, rayTarget,bounds[0],bounds[1],param, normal);
|
||||
#endif
|
||||
|
||||
isLeafNode = rootNode->m_escapeIndex == -1;
|
||||
@@ -537,16 +537,16 @@ void b3QuantizedBvh::walkStacklessTreeAgainstRay(btNodeOverlapCallback* nodeCall
|
||||
|
||||
|
||||
|
||||
void b3QuantizedBvh::walkStacklessQuantizedTreeAgainstRay(btNodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax, int startNodeIndex,int endNodeIndex) const
|
||||
void b3QuantizedBvh::walkStacklessQuantizedTreeAgainstRay(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax, int startNodeIndex,int endNodeIndex) const
|
||||
{
|
||||
btAssert(m_useQuantization);
|
||||
b3Assert(m_useQuantization);
|
||||
|
||||
int curIndex = startNodeIndex;
|
||||
int walkIterations = 0;
|
||||
int subTreeSize = endNodeIndex - startNodeIndex;
|
||||
(void)subTreeSize;
|
||||
|
||||
const btQuantizedBvhNode* rootNode = &m_quantizedContiguousNodes[startNodeIndex];
|
||||
const b3QuantizedBvhNode* rootNode = &m_quantizedContiguousNodes[startNodeIndex];
|
||||
int escapeIndex;
|
||||
|
||||
bool isLeafNode;
|
||||
@@ -561,9 +561,9 @@ void b3QuantizedBvh::walkStacklessQuantizedTreeAgainstRay(btNodeOverlapCallback*
|
||||
rayDirection.normalize ();
|
||||
lambda_max = rayDirection.dot(rayTarget-raySource);
|
||||
///what about division by zero? --> just set rayDirection[i] to 1.0
|
||||
rayDirection[0] = rayDirection[0] == b3Scalar(0.0) ? b3Scalar(BT_LARGE_FLOAT) : b3Scalar(1.0) / rayDirection[0];
|
||||
rayDirection[1] = rayDirection[1] == b3Scalar(0.0) ? b3Scalar(BT_LARGE_FLOAT) : b3Scalar(1.0) / rayDirection[1];
|
||||
rayDirection[2] = rayDirection[2] == b3Scalar(0.0) ? b3Scalar(BT_LARGE_FLOAT) : b3Scalar(1.0) / rayDirection[2];
|
||||
rayDirection[0] = rayDirection[0] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDirection[0];
|
||||
rayDirection[1] = rayDirection[1] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDirection[1];
|
||||
rayDirection[2] = rayDirection[2] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDirection[2];
|
||||
unsigned int sign[3] = { rayDirection[0] < 0.0, rayDirection[1] < 0.0, rayDirection[2] < 0.0};
|
||||
#endif
|
||||
|
||||
@@ -590,7 +590,7 @@ void b3QuantizedBvh::walkStacklessQuantizedTreeAgainstRay(btNodeOverlapCallback*
|
||||
//some code snippet to debugDraw aabb, to visually analyze bvh structure
|
||||
static int drawPatch = 0;
|
||||
//need some global access to a debugDrawer
|
||||
extern btIDebugDraw* debugDrawerPtr;
|
||||
extern b3IDebugDraw* debugDrawerPtr;
|
||||
if (curIndex==drawPatch)
|
||||
{
|
||||
b3Vector3 aabbMin,aabbMax;
|
||||
@@ -602,7 +602,7 @@ void b3QuantizedBvh::walkStacklessQuantizedTreeAgainstRay(btNodeOverlapCallback*
|
||||
#endif//VISUALLY_ANALYZE_BVH
|
||||
|
||||
//catch bugs in tree data
|
||||
btAssert (walkIterations < subTreeSize);
|
||||
b3Assert (walkIterations < subTreeSize);
|
||||
|
||||
walkIterations++;
|
||||
//PCK: unsigned instead of bool
|
||||
@@ -621,8 +621,8 @@ void b3QuantizedBvh::walkStacklessQuantizedTreeAgainstRay(btNodeOverlapCallback*
|
||||
bounds[1] -= aabbMin;
|
||||
b3Vector3 normal;
|
||||
#if 0
|
||||
bool ra2 = btRayAabb2 (raySource, rayDirection, sign, bounds, param, 0.0, lambda_max);
|
||||
bool ra = btRayAabb (raySource, rayTarget, bounds[0], bounds[1], param, normal);
|
||||
bool ra2 = b3RayAabb2 (raySource, rayDirection, sign, bounds, param, 0.0, lambda_max);
|
||||
bool ra = b3RayAabb (raySource, rayTarget, bounds[0], bounds[1], param, normal);
|
||||
if (ra2 != ra)
|
||||
{
|
||||
printf("functions don't match\n");
|
||||
@@ -633,11 +633,11 @@ void b3QuantizedBvh::walkStacklessQuantizedTreeAgainstRay(btNodeOverlapCallback*
|
||||
///thanks Joerg/hiker for the reproduction case!
|
||||
///http://www.bulletphysics.com/Bullet/phpBB3/viewtopic.php?f=9&t=1858
|
||||
|
||||
//BT_PROFILE("btRayAabb2");
|
||||
rayBoxOverlap = btRayAabb2 (raySource, rayDirection, sign, bounds, param, 0.0f, lambda_max);
|
||||
//B3_PROFILE("b3RayAabb2");
|
||||
rayBoxOverlap = b3RayAabb2 (raySource, rayDirection, sign, bounds, param, 0.0f, lambda_max);
|
||||
|
||||
#else
|
||||
rayBoxOverlap = true;//btRayAabb(raySource, rayTarget, bounds[0], bounds[1], param, normal);
|
||||
rayBoxOverlap = true;//b3RayAabb(raySource, rayTarget, bounds[0], bounds[1], param, normal);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -663,16 +663,16 @@ void b3QuantizedBvh::walkStacklessQuantizedTreeAgainstRay(btNodeOverlapCallback*
|
||||
|
||||
}
|
||||
|
||||
void b3QuantizedBvh::walkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax,int startNodeIndex,int endNodeIndex) const
|
||||
void b3QuantizedBvh::walkStacklessQuantizedTree(b3NodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax,int startNodeIndex,int endNodeIndex) const
|
||||
{
|
||||
btAssert(m_useQuantization);
|
||||
b3Assert(m_useQuantization);
|
||||
|
||||
int curIndex = startNodeIndex;
|
||||
int walkIterations = 0;
|
||||
int subTreeSize = endNodeIndex - startNodeIndex;
|
||||
(void)subTreeSize;
|
||||
|
||||
const btQuantizedBvhNode* rootNode = &m_quantizedContiguousNodes[startNodeIndex];
|
||||
const b3QuantizedBvhNode* rootNode = &m_quantizedContiguousNodes[startNodeIndex];
|
||||
int escapeIndex;
|
||||
|
||||
bool isLeafNode;
|
||||
@@ -687,7 +687,7 @@ void b3QuantizedBvh::walkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallb
|
||||
//some code snippet to debugDraw aabb, to visually analyze bvh structure
|
||||
static int drawPatch = 0;
|
||||
//need some global access to a debugDrawer
|
||||
extern btIDebugDraw* debugDrawerPtr;
|
||||
extern b3IDebugDraw* debugDrawerPtr;
|
||||
if (curIndex==drawPatch)
|
||||
{
|
||||
b3Vector3 aabbMin,aabbMax;
|
||||
@@ -699,7 +699,7 @@ void b3QuantizedBvh::walkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallb
|
||||
#endif//VISUALLY_ANALYZE_BVH
|
||||
|
||||
//catch bugs in tree data
|
||||
btAssert (walkIterations < subTreeSize);
|
||||
b3Assert (walkIterations < subTreeSize);
|
||||
|
||||
walkIterations++;
|
||||
//PCK: unsigned instead of bool
|
||||
@@ -729,16 +729,16 @@ void b3QuantizedBvh::walkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallb
|
||||
}
|
||||
|
||||
//This traversal can be called from Playstation 3 SPU
|
||||
void b3QuantizedBvh::walkStacklessQuantizedTreeCacheFriendly(btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const
|
||||
void b3QuantizedBvh::walkStacklessQuantizedTreeCacheFriendly(b3NodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const
|
||||
{
|
||||
btAssert(m_useQuantization);
|
||||
b3Assert(m_useQuantization);
|
||||
|
||||
int i;
|
||||
|
||||
|
||||
for (i=0;i<this->m_SubtreeHeaders.size();i++)
|
||||
{
|
||||
const btBvhSubtreeInfo& subtree = m_SubtreeHeaders[i];
|
||||
const b3BvhSubtreeInfo& subtree = m_SubtreeHeaders[i];
|
||||
|
||||
//PCK: unsigned instead of bool
|
||||
unsigned overlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax);
|
||||
@@ -752,13 +752,13 @@ void b3QuantizedBvh::walkStacklessQuantizedTreeCacheFriendly(btNodeOverlapCallba
|
||||
}
|
||||
|
||||
|
||||
void b3QuantizedBvh::reportRayOverlappingNodex (btNodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget) const
|
||||
void b3QuantizedBvh::reportRayOverlappingNodex (b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget) const
|
||||
{
|
||||
reportBoxCastOverlappingNodex(nodeCallback,raySource,rayTarget,b3Vector3(0,0,0),b3Vector3(0,0,0));
|
||||
}
|
||||
|
||||
|
||||
void b3QuantizedBvh::reportBoxCastOverlappingNodex(btNodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin,const b3Vector3& aabbMax) const
|
||||
void b3QuantizedBvh::reportBoxCastOverlappingNodex(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin,const b3Vector3& aabbMax) const
|
||||
{
|
||||
//always use stackless
|
||||
|
||||
@@ -790,12 +790,12 @@ void b3QuantizedBvh::swapLeafNodes(int i,int splitIndex)
|
||||
{
|
||||
if (m_useQuantization)
|
||||
{
|
||||
btQuantizedBvhNode tmp = m_quantizedLeafNodes[i];
|
||||
b3QuantizedBvhNode tmp = m_quantizedLeafNodes[i];
|
||||
m_quantizedLeafNodes[i] = m_quantizedLeafNodes[splitIndex];
|
||||
m_quantizedLeafNodes[splitIndex] = tmp;
|
||||
} else
|
||||
{
|
||||
btOptimizedBvhNode tmp = m_leafNodes[i];
|
||||
b3OptimizedBvhNode tmp = m_leafNodes[i];
|
||||
m_leafNodes[i] = m_leafNodes[splitIndex];
|
||||
m_leafNodes[splitIndex] = tmp;
|
||||
}
|
||||
@@ -833,23 +833,23 @@ unsigned int b3QuantizedBvh::getAlignmentSerializationPadding()
|
||||
unsigned b3QuantizedBvh::calculateSerializeBufferSize() const
|
||||
{
|
||||
unsigned baseSize = sizeof(b3QuantizedBvh) + getAlignmentSerializationPadding();
|
||||
baseSize += sizeof(btBvhSubtreeInfo) * m_subtreeHeaderCount;
|
||||
baseSize += sizeof(b3BvhSubtreeInfo) * m_subtreeHeaderCount;
|
||||
if (m_useQuantization)
|
||||
{
|
||||
return baseSize + m_curNodeIndex * sizeof(btQuantizedBvhNode);
|
||||
return baseSize + m_curNodeIndex * sizeof(b3QuantizedBvhNode);
|
||||
}
|
||||
return baseSize + m_curNodeIndex * sizeof(btOptimizedBvhNode);
|
||||
return baseSize + m_curNodeIndex * sizeof(b3OptimizedBvhNode);
|
||||
}
|
||||
|
||||
bool b3QuantizedBvh::serialize(void *o_alignedDataBuffer, unsigned /*i_dataBufferSize */, bool i_swapEndian) const
|
||||
{
|
||||
btAssert(m_subtreeHeaderCount == m_SubtreeHeaders.size());
|
||||
b3Assert(m_subtreeHeaderCount == m_SubtreeHeaders.size());
|
||||
m_subtreeHeaderCount = m_SubtreeHeaders.size();
|
||||
|
||||
/* if (i_dataBufferSize < calculateSerializeBufferSize() || o_alignedDataBuffer == NULL || (((unsigned)o_alignedDataBuffer & BVH_ALIGNMENT_MASK) != 0))
|
||||
{
|
||||
///check alignedment for buffer?
|
||||
btAssert(0);
|
||||
b3Assert(0);
|
||||
return false;
|
||||
}
|
||||
*/
|
||||
@@ -862,15 +862,15 @@ bool b3QuantizedBvh::serialize(void *o_alignedDataBuffer, unsigned /*i_dataBuffe
|
||||
|
||||
if (i_swapEndian)
|
||||
{
|
||||
targetBvh->m_curNodeIndex = static_cast<int>(btSwapEndian(m_curNodeIndex));
|
||||
targetBvh->m_curNodeIndex = static_cast<int>(b3SwapEndian(m_curNodeIndex));
|
||||
|
||||
|
||||
btSwapVector3Endian(m_bvhAabbMin,targetBvh->m_bvhAabbMin);
|
||||
btSwapVector3Endian(m_bvhAabbMax,targetBvh->m_bvhAabbMax);
|
||||
btSwapVector3Endian(m_bvhQuantization,targetBvh->m_bvhQuantization);
|
||||
b3SwapVector3Endian(m_bvhAabbMin,targetBvh->m_bvhAabbMin);
|
||||
b3SwapVector3Endian(m_bvhAabbMax,targetBvh->m_bvhAabbMax);
|
||||
b3SwapVector3Endian(m_bvhQuantization,targetBvh->m_bvhQuantization);
|
||||
|
||||
targetBvh->m_traversalMode = (btTraversalMode)btSwapEndian(m_traversalMode);
|
||||
targetBvh->m_subtreeHeaderCount = static_cast<int>(btSwapEndian(m_subtreeHeaderCount));
|
||||
targetBvh->m_traversalMode = (b3TraversalMode)b3SwapEndian(m_traversalMode);
|
||||
targetBvh->m_subtreeHeaderCount = static_cast<int>(b3SwapEndian(m_subtreeHeaderCount));
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -900,15 +900,15 @@ bool b3QuantizedBvh::serialize(void *o_alignedDataBuffer, unsigned /*i_dataBuffe
|
||||
{
|
||||
for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++)
|
||||
{
|
||||
targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0] = btSwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0]);
|
||||
targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1] = btSwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1]);
|
||||
targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2] = btSwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2]);
|
||||
targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0] = b3SwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0]);
|
||||
targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1] = b3SwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1]);
|
||||
targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2] = b3SwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2]);
|
||||
|
||||
targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0] = btSwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0]);
|
||||
targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1] = btSwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1]);
|
||||
targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2] = btSwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2]);
|
||||
targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0] = b3SwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0]);
|
||||
targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1] = b3SwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1]);
|
||||
targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2] = b3SwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2]);
|
||||
|
||||
targetBvh->m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex = static_cast<int>(btSwapEndian(m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex));
|
||||
targetBvh->m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex = static_cast<int>(b3SwapEndian(m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex));
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -929,7 +929,7 @@ bool b3QuantizedBvh::serialize(void *o_alignedDataBuffer, unsigned /*i_dataBuffe
|
||||
|
||||
}
|
||||
}
|
||||
nodeData += sizeof(btQuantizedBvhNode) * nodeCount;
|
||||
nodeData += sizeof(b3QuantizedBvhNode) * nodeCount;
|
||||
|
||||
// this clears the pointer in the member variable it doesn't really do anything to the data
|
||||
// it does call the destructor on the contained objects, but they are all classes with no destructor defined
|
||||
@@ -944,12 +944,12 @@ bool b3QuantizedBvh::serialize(void *o_alignedDataBuffer, unsigned /*i_dataBuffe
|
||||
{
|
||||
for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++)
|
||||
{
|
||||
btSwapVector3Endian(m_contiguousNodes[nodeIndex].m_aabbMinOrg, targetBvh->m_contiguousNodes[nodeIndex].m_aabbMinOrg);
|
||||
btSwapVector3Endian(m_contiguousNodes[nodeIndex].m_aabbMaxOrg, targetBvh->m_contiguousNodes[nodeIndex].m_aabbMaxOrg);
|
||||
b3SwapVector3Endian(m_contiguousNodes[nodeIndex].m_aabbMinOrg, targetBvh->m_contiguousNodes[nodeIndex].m_aabbMinOrg);
|
||||
b3SwapVector3Endian(m_contiguousNodes[nodeIndex].m_aabbMaxOrg, targetBvh->m_contiguousNodes[nodeIndex].m_aabbMaxOrg);
|
||||
|
||||
targetBvh->m_contiguousNodes[nodeIndex].m_escapeIndex = static_cast<int>(btSwapEndian(m_contiguousNodes[nodeIndex].m_escapeIndex));
|
||||
targetBvh->m_contiguousNodes[nodeIndex].m_subPart = static_cast<int>(btSwapEndian(m_contiguousNodes[nodeIndex].m_subPart));
|
||||
targetBvh->m_contiguousNodes[nodeIndex].m_triangleIndex = static_cast<int>(btSwapEndian(m_contiguousNodes[nodeIndex].m_triangleIndex));
|
||||
targetBvh->m_contiguousNodes[nodeIndex].m_escapeIndex = static_cast<int>(b3SwapEndian(m_contiguousNodes[nodeIndex].m_escapeIndex));
|
||||
targetBvh->m_contiguousNodes[nodeIndex].m_subPart = static_cast<int>(b3SwapEndian(m_contiguousNodes[nodeIndex].m_subPart));
|
||||
targetBvh->m_contiguousNodes[nodeIndex].m_triangleIndex = static_cast<int>(b3SwapEndian(m_contiguousNodes[nodeIndex].m_triangleIndex));
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -964,7 +964,7 @@ bool b3QuantizedBvh::serialize(void *o_alignedDataBuffer, unsigned /*i_dataBuffe
|
||||
targetBvh->m_contiguousNodes[nodeIndex].m_triangleIndex = m_contiguousNodes[nodeIndex].m_triangleIndex;
|
||||
}
|
||||
}
|
||||
nodeData += sizeof(btOptimizedBvhNode) * nodeCount;
|
||||
nodeData += sizeof(b3OptimizedBvhNode) * nodeCount;
|
||||
|
||||
// this clears the pointer in the member variable it doesn't really do anything to the data
|
||||
// it does call the destructor on the contained objects, but they are all classes with no destructor defined
|
||||
@@ -981,16 +981,16 @@ bool b3QuantizedBvh::serialize(void *o_alignedDataBuffer, unsigned /*i_dataBuffe
|
||||
{
|
||||
for (int i = 0; i < m_subtreeHeaderCount; i++)
|
||||
{
|
||||
targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[0] = btSwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMin[0]);
|
||||
targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[1] = btSwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMin[1]);
|
||||
targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[2] = btSwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMin[2]);
|
||||
targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[0] = b3SwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMin[0]);
|
||||
targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[1] = b3SwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMin[1]);
|
||||
targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[2] = b3SwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMin[2]);
|
||||
|
||||
targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[0] = btSwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMax[0]);
|
||||
targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[1] = btSwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMax[1]);
|
||||
targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[2] = btSwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMax[2]);
|
||||
targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[0] = b3SwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMax[0]);
|
||||
targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[1] = b3SwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMax[1]);
|
||||
targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[2] = b3SwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMax[2]);
|
||||
|
||||
targetBvh->m_SubtreeHeaders[i].m_rootNodeIndex = static_cast<int>(btSwapEndian(m_SubtreeHeaders[i].m_rootNodeIndex));
|
||||
targetBvh->m_SubtreeHeaders[i].m_subtreeSize = static_cast<int>(btSwapEndian(m_SubtreeHeaders[i].m_subtreeSize));
|
||||
targetBvh->m_SubtreeHeaders[i].m_rootNodeIndex = static_cast<int>(b3SwapEndian(m_SubtreeHeaders[i].m_rootNodeIndex));
|
||||
targetBvh->m_SubtreeHeaders[i].m_subtreeSize = static_cast<int>(b3SwapEndian(m_SubtreeHeaders[i].m_subtreeSize));
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -1014,7 +1014,7 @@ bool b3QuantizedBvh::serialize(void *o_alignedDataBuffer, unsigned /*i_dataBuffe
|
||||
targetBvh->m_SubtreeHeaders[i].m_padding[2] = 0;
|
||||
}
|
||||
}
|
||||
nodeData += sizeof(btBvhSubtreeInfo) * m_subtreeHeaderCount;
|
||||
nodeData += sizeof(b3BvhSubtreeInfo) * m_subtreeHeaderCount;
|
||||
|
||||
// this clears the pointer in the member variable it doesn't really do anything to the data
|
||||
// it does call the destructor on the contained objects, but they are all classes with no destructor defined
|
||||
@@ -1038,18 +1038,18 @@ b3QuantizedBvh *b3QuantizedBvh::deSerializeInPlace(void *i_alignedDataBuffer, un
|
||||
|
||||
if (i_swapEndian)
|
||||
{
|
||||
bvh->m_curNodeIndex = static_cast<int>(btSwapEndian(bvh->m_curNodeIndex));
|
||||
bvh->m_curNodeIndex = static_cast<int>(b3SwapEndian(bvh->m_curNodeIndex));
|
||||
|
||||
btUnSwapVector3Endian(bvh->m_bvhAabbMin);
|
||||
btUnSwapVector3Endian(bvh->m_bvhAabbMax);
|
||||
btUnSwapVector3Endian(bvh->m_bvhQuantization);
|
||||
b3UnSwapVector3Endian(bvh->m_bvhAabbMin);
|
||||
b3UnSwapVector3Endian(bvh->m_bvhAabbMax);
|
||||
b3UnSwapVector3Endian(bvh->m_bvhQuantization);
|
||||
|
||||
bvh->m_traversalMode = (btTraversalMode)btSwapEndian(bvh->m_traversalMode);
|
||||
bvh->m_subtreeHeaderCount = static_cast<int>(btSwapEndian(bvh->m_subtreeHeaderCount));
|
||||
bvh->m_traversalMode = (b3TraversalMode)b3SwapEndian(bvh->m_traversalMode);
|
||||
bvh->m_subtreeHeaderCount = static_cast<int>(b3SwapEndian(bvh->m_subtreeHeaderCount));
|
||||
}
|
||||
|
||||
unsigned int calculatedBufSize = bvh->calculateSerializeBufferSize();
|
||||
btAssert(calculatedBufSize <= i_dataBufferSize);
|
||||
b3Assert(calculatedBufSize <= i_dataBufferSize);
|
||||
|
||||
if (calculatedBufSize > i_dataBufferSize)
|
||||
{
|
||||
@@ -1076,18 +1076,18 @@ b3QuantizedBvh *b3QuantizedBvh::deSerializeInPlace(void *i_alignedDataBuffer, un
|
||||
{
|
||||
for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++)
|
||||
{
|
||||
bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0] = btSwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0]);
|
||||
bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1] = btSwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1]);
|
||||
bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2] = btSwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2]);
|
||||
bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0] = b3SwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0]);
|
||||
bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1] = b3SwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1]);
|
||||
bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2] = b3SwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2]);
|
||||
|
||||
bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0] = btSwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0]);
|
||||
bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1] = btSwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1]);
|
||||
bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2] = btSwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2]);
|
||||
bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0] = b3SwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0]);
|
||||
bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1] = b3SwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1]);
|
||||
bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2] = b3SwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2]);
|
||||
|
||||
bvh->m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex = static_cast<int>(btSwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex));
|
||||
bvh->m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex = static_cast<int>(b3SwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex));
|
||||
}
|
||||
}
|
||||
nodeData += sizeof(btQuantizedBvhNode) * nodeCount;
|
||||
nodeData += sizeof(b3QuantizedBvhNode) * nodeCount;
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -1097,15 +1097,15 @@ b3QuantizedBvh *b3QuantizedBvh::deSerializeInPlace(void *i_alignedDataBuffer, un
|
||||
{
|
||||
for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++)
|
||||
{
|
||||
btUnSwapVector3Endian(bvh->m_contiguousNodes[nodeIndex].m_aabbMinOrg);
|
||||
btUnSwapVector3Endian(bvh->m_contiguousNodes[nodeIndex].m_aabbMaxOrg);
|
||||
b3UnSwapVector3Endian(bvh->m_contiguousNodes[nodeIndex].m_aabbMinOrg);
|
||||
b3UnSwapVector3Endian(bvh->m_contiguousNodes[nodeIndex].m_aabbMaxOrg);
|
||||
|
||||
bvh->m_contiguousNodes[nodeIndex].m_escapeIndex = static_cast<int>(btSwapEndian(bvh->m_contiguousNodes[nodeIndex].m_escapeIndex));
|
||||
bvh->m_contiguousNodes[nodeIndex].m_subPart = static_cast<int>(btSwapEndian(bvh->m_contiguousNodes[nodeIndex].m_subPart));
|
||||
bvh->m_contiguousNodes[nodeIndex].m_triangleIndex = static_cast<int>(btSwapEndian(bvh->m_contiguousNodes[nodeIndex].m_triangleIndex));
|
||||
bvh->m_contiguousNodes[nodeIndex].m_escapeIndex = static_cast<int>(b3SwapEndian(bvh->m_contiguousNodes[nodeIndex].m_escapeIndex));
|
||||
bvh->m_contiguousNodes[nodeIndex].m_subPart = static_cast<int>(b3SwapEndian(bvh->m_contiguousNodes[nodeIndex].m_subPart));
|
||||
bvh->m_contiguousNodes[nodeIndex].m_triangleIndex = static_cast<int>(b3SwapEndian(bvh->m_contiguousNodes[nodeIndex].m_triangleIndex));
|
||||
}
|
||||
}
|
||||
nodeData += sizeof(btOptimizedBvhNode) * nodeCount;
|
||||
nodeData += sizeof(b3OptimizedBvhNode) * nodeCount;
|
||||
}
|
||||
|
||||
sizeToAdd = 0;//(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK;
|
||||
@@ -1117,16 +1117,16 @@ b3QuantizedBvh *b3QuantizedBvh::deSerializeInPlace(void *i_alignedDataBuffer, un
|
||||
{
|
||||
for (int i = 0; i < bvh->m_subtreeHeaderCount; i++)
|
||||
{
|
||||
bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[0] = btSwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[0]);
|
||||
bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[1] = btSwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[1]);
|
||||
bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[2] = btSwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[2]);
|
||||
bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[0] = b3SwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[0]);
|
||||
bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[1] = b3SwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[1]);
|
||||
bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[2] = b3SwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[2]);
|
||||
|
||||
bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[0] = btSwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[0]);
|
||||
bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[1] = btSwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[1]);
|
||||
bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[2] = btSwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[2]);
|
||||
bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[0] = b3SwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[0]);
|
||||
bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[1] = b3SwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[1]);
|
||||
bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[2] = b3SwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[2]);
|
||||
|
||||
bvh->m_SubtreeHeaders[i].m_rootNodeIndex = static_cast<int>(btSwapEndian(bvh->m_SubtreeHeaders[i].m_rootNodeIndex));
|
||||
bvh->m_SubtreeHeaders[i].m_subtreeSize = static_cast<int>(btSwapEndian(bvh->m_SubtreeHeaders[i].m_subtreeSize));
|
||||
bvh->m_SubtreeHeaders[i].m_rootNodeIndex = static_cast<int>(b3SwapEndian(bvh->m_SubtreeHeaders[i].m_rootNodeIndex));
|
||||
bvh->m_SubtreeHeaders[i].m_subtreeSize = static_cast<int>(b3SwapEndian(bvh->m_SubtreeHeaders[i].m_subtreeSize));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1138,12 +1138,12 @@ b3QuantizedBvh::b3QuantizedBvh(b3QuantizedBvh &self, bool /* ownsMemory */) :
|
||||
m_bvhAabbMin(self.m_bvhAabbMin),
|
||||
m_bvhAabbMax(self.m_bvhAabbMax),
|
||||
m_bvhQuantization(self.m_bvhQuantization),
|
||||
m_bulletVersion(BT_BULLET_VERSION)
|
||||
m_bulletVersion(B3_BULLET_VERSION)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void b3QuantizedBvh::deSerializeFloat(struct btQuantizedBvhFloatData& quantizedBvhFloatData)
|
||||
void b3QuantizedBvh::deSerializeFloat(struct b3QuantizedBvhFloatData& quantizedBvhFloatData)
|
||||
{
|
||||
m_bvhAabbMax.deSerializeFloat(quantizedBvhFloatData.m_bvhAabbMax);
|
||||
m_bvhAabbMin.deSerializeFloat(quantizedBvhFloatData.m_bvhAabbMin);
|
||||
@@ -1158,7 +1158,7 @@ void b3QuantizedBvh::deSerializeFloat(struct btQuantizedBvhFloatData& quantizedB
|
||||
|
||||
if (numElem)
|
||||
{
|
||||
btOptimizedBvhNodeFloatData* memPtr = quantizedBvhFloatData.m_contiguousNodesPtr;
|
||||
b3OptimizedBvhNodeFloatData* memPtr = quantizedBvhFloatData.m_contiguousNodesPtr;
|
||||
|
||||
for (int i=0;i<numElem;i++,memPtr++)
|
||||
{
|
||||
@@ -1177,7 +1177,7 @@ void b3QuantizedBvh::deSerializeFloat(struct btQuantizedBvhFloatData& quantizedB
|
||||
|
||||
if (numElem)
|
||||
{
|
||||
btQuantizedBvhNodeData* memPtr = quantizedBvhFloatData.m_quantizedContiguousNodesPtr;
|
||||
b3QuantizedBvhNodeData* memPtr = quantizedBvhFloatData.m_quantizedContiguousNodesPtr;
|
||||
for (int i=0;i<numElem;i++,memPtr++)
|
||||
{
|
||||
m_quantizedContiguousNodes[i].m_escapeIndexOrTriangleIndex = memPtr->m_escapeIndexOrTriangleIndex;
|
||||
@@ -1191,14 +1191,14 @@ void b3QuantizedBvh::deSerializeFloat(struct btQuantizedBvhFloatData& quantizedB
|
||||
}
|
||||
}
|
||||
|
||||
m_traversalMode = btTraversalMode(quantizedBvhFloatData.m_traversalMode);
|
||||
m_traversalMode = b3TraversalMode(quantizedBvhFloatData.m_traversalMode);
|
||||
|
||||
{
|
||||
int numElem = quantizedBvhFloatData.m_numSubtreeHeaders;
|
||||
m_SubtreeHeaders.resize(numElem);
|
||||
if (numElem)
|
||||
{
|
||||
btBvhSubtreeInfoData* memPtr = quantizedBvhFloatData.m_subTreeInfoPtr;
|
||||
b3BvhSubtreeInfoData* memPtr = quantizedBvhFloatData.m_subTreeInfoPtr;
|
||||
for (int i=0;i<numElem;i++,memPtr++)
|
||||
{
|
||||
m_SubtreeHeaders[i].m_quantizedAabbMax[0] = memPtr->m_quantizedAabbMax[0] ;
|
||||
@@ -1214,7 +1214,7 @@ void b3QuantizedBvh::deSerializeFloat(struct btQuantizedBvhFloatData& quantizedB
|
||||
}
|
||||
}
|
||||
|
||||
void b3QuantizedBvh::deSerializeDouble(struct btQuantizedBvhDoubleData& quantizedBvhDoubleData)
|
||||
void b3QuantizedBvh::deSerializeDouble(struct b3QuantizedBvhDoubleData& quantizedBvhDoubleData)
|
||||
{
|
||||
m_bvhAabbMax.deSerializeDouble(quantizedBvhDoubleData.m_bvhAabbMax);
|
||||
m_bvhAabbMin.deSerializeDouble(quantizedBvhDoubleData.m_bvhAabbMin);
|
||||
@@ -1229,7 +1229,7 @@ void b3QuantizedBvh::deSerializeDouble(struct btQuantizedBvhDoubleData& quantize
|
||||
|
||||
if (numElem)
|
||||
{
|
||||
btOptimizedBvhNodeDoubleData* memPtr = quantizedBvhDoubleData.m_contiguousNodesPtr;
|
||||
b3OptimizedBvhNodeDoubleData* memPtr = quantizedBvhDoubleData.m_contiguousNodesPtr;
|
||||
|
||||
for (int i=0;i<numElem;i++,memPtr++)
|
||||
{
|
||||
@@ -1248,7 +1248,7 @@ void b3QuantizedBvh::deSerializeDouble(struct btQuantizedBvhDoubleData& quantize
|
||||
|
||||
if (numElem)
|
||||
{
|
||||
btQuantizedBvhNodeData* memPtr = quantizedBvhDoubleData.m_quantizedContiguousNodesPtr;
|
||||
b3QuantizedBvhNodeData* memPtr = quantizedBvhDoubleData.m_quantizedContiguousNodesPtr;
|
||||
for (int i=0;i<numElem;i++,memPtr++)
|
||||
{
|
||||
m_quantizedContiguousNodes[i].m_escapeIndexOrTriangleIndex = memPtr->m_escapeIndexOrTriangleIndex;
|
||||
@@ -1262,14 +1262,14 @@ void b3QuantizedBvh::deSerializeDouble(struct btQuantizedBvhDoubleData& quantize
|
||||
}
|
||||
}
|
||||
|
||||
m_traversalMode = btTraversalMode(quantizedBvhDoubleData.m_traversalMode);
|
||||
m_traversalMode = b3TraversalMode(quantizedBvhDoubleData.m_traversalMode);
|
||||
|
||||
{
|
||||
int numElem = quantizedBvhDoubleData.m_numSubtreeHeaders;
|
||||
m_SubtreeHeaders.resize(numElem);
|
||||
if (numElem)
|
||||
{
|
||||
btBvhSubtreeInfoData* memPtr = quantizedBvhDoubleData.m_subTreeInfoPtr;
|
||||
b3BvhSubtreeInfoData* memPtr = quantizedBvhDoubleData.m_subTreeInfoPtr;
|
||||
for (int i=0;i<numElem;i++,memPtr++)
|
||||
{
|
||||
m_SubtreeHeaders[i].m_quantizedAabbMax[0] = memPtr->m_quantizedAabbMax[0] ;
|
||||
@@ -1289,9 +1289,9 @@ void b3QuantizedBvh::deSerializeDouble(struct btQuantizedBvhDoubleData& quantize
|
||||
|
||||
|
||||
///fills the dataBuffer and returns the struct name (and 0 on failure)
|
||||
const char* b3QuantizedBvh::serialize(void* dataBuffer, btSerializer* serializer) const
|
||||
const char* b3QuantizedBvh::serialize(void* dataBuffer, b3Serializer* serializer) const
|
||||
{
|
||||
btAssert(0);
|
||||
b3Assert(0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -13,10 +13,10 @@ subject to the following restrictions:
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
#ifndef BT_QUANTIZED_BVH_H
|
||||
#define BT_QUANTIZED_BVH_H
|
||||
#ifndef B3_QUANTIZED_BVH_H
|
||||
#define B3_QUANTIZED_BVH_H
|
||||
|
||||
class btSerializer;
|
||||
class b3Serializer;
|
||||
|
||||
//#define DEBUG_CHECK_DEQUANTIZATION 1
|
||||
#ifdef DEBUG_CHECK_DEQUANTIZATION
|
||||
@@ -31,14 +31,14 @@ class btSerializer;
|
||||
#include "Bullet3Common/b3Vector3.h"
|
||||
#include "Bullet3Common/b3AlignedAllocator.h"
|
||||
|
||||
#ifdef BT_USE_DOUBLE_PRECISION
|
||||
#define btQuantizedBvhData btQuantizedBvhDoubleData
|
||||
#define btOptimizedBvhNodeData btOptimizedBvhNodeDoubleData
|
||||
#define btQuantizedBvhDataName "btQuantizedBvhDoubleData"
|
||||
#ifdef B3_USE_DOUBLE_PRECISION
|
||||
#define b3QuantizedBvhData b3QuantizedBvhDoubleData
|
||||
#define b3OptimizedBvhNodeData b3OptimizedBvhNodeDoubleData
|
||||
#define b3QuantizedBvhDataName "b3QuantizedBvhDoubleData"
|
||||
#else
|
||||
#define btQuantizedBvhData btQuantizedBvhFloatData
|
||||
#define btOptimizedBvhNodeData btOptimizedBvhNodeFloatData
|
||||
#define btQuantizedBvhDataName "btQuantizedBvhFloatData"
|
||||
#define b3QuantizedBvhData b3QuantizedBvhFloatData
|
||||
#define b3OptimizedBvhNodeData b3OptimizedBvhNodeFloatData
|
||||
#define b3QuantizedBvhDataName "b3QuantizedBvhFloatData"
|
||||
#endif
|
||||
|
||||
|
||||
@@ -53,11 +53,11 @@ class btSerializer;
|
||||
// actually) triangles each (since the sign bit is reserved
|
||||
#define MAX_NUM_PARTS_IN_BITS 10
|
||||
|
||||
///btQuantizedBvhNode is a compressed aabb node, 16 bytes.
|
||||
///b3QuantizedBvhNode is a compressed aabb node, 16 bytes.
|
||||
///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).
|
||||
ATTRIBUTE_ALIGNED16 (struct) btQuantizedBvhNode
|
||||
ATTRIBUTE_ALIGNED16 (struct) b3QuantizedBvhNode
|
||||
{
|
||||
BT_DECLARE_ALIGNED_ALLOCATOR();
|
||||
B3_DECLARE_ALIGNED_ALLOCATOR();
|
||||
|
||||
//12 bytes
|
||||
unsigned short int m_quantizedAabbMin[3];
|
||||
@@ -72,12 +72,12 @@ ATTRIBUTE_ALIGNED16 (struct) btQuantizedBvhNode
|
||||
}
|
||||
int getEscapeIndex() const
|
||||
{
|
||||
btAssert(!isLeafNode());
|
||||
b3Assert(!isLeafNode());
|
||||
return -m_escapeIndexOrTriangleIndex;
|
||||
}
|
||||
int getTriangleIndex() const
|
||||
{
|
||||
btAssert(isLeafNode());
|
||||
b3Assert(isLeafNode());
|
||||
unsigned int x=0;
|
||||
unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);
|
||||
// Get only the lower bits where the triangle index is stored
|
||||
@@ -85,18 +85,18 @@ ATTRIBUTE_ALIGNED16 (struct) btQuantizedBvhNode
|
||||
}
|
||||
int getPartId() const
|
||||
{
|
||||
btAssert(isLeafNode());
|
||||
b3Assert(isLeafNode());
|
||||
// Get only the highest bits where the part index is stored
|
||||
return (m_escapeIndexOrTriangleIndex>>(31-MAX_NUM_PARTS_IN_BITS));
|
||||
}
|
||||
}
|
||||
;
|
||||
|
||||
/// btOptimizedBvhNode contains both internal and leaf node information.
|
||||
/// b3OptimizedBvhNode contains both internal and leaf node information.
|
||||
/// Total node size is 44 bytes / node. You can use the compressed version of 16 bytes.
|
||||
ATTRIBUTE_ALIGNED16 (struct) btOptimizedBvhNode
|
||||
ATTRIBUTE_ALIGNED16 (struct) b3OptimizedBvhNode
|
||||
{
|
||||
BT_DECLARE_ALIGNED_ALLOCATOR();
|
||||
B3_DECLARE_ALIGNED_ALLOCATOR();
|
||||
|
||||
//32 bytes
|
||||
b3Vector3 m_aabbMinOrg;
|
||||
@@ -115,11 +115,11 @@ ATTRIBUTE_ALIGNED16 (struct) btOptimizedBvhNode
|
||||
};
|
||||
|
||||
|
||||
///btBvhSubtreeInfo provides info to gather a subtree of limited size
|
||||
ATTRIBUTE_ALIGNED16(class) btBvhSubtreeInfo
|
||||
///b3BvhSubtreeInfo provides info to gather a subtree of limited size
|
||||
ATTRIBUTE_ALIGNED16(class) b3BvhSubtreeInfo
|
||||
{
|
||||
public:
|
||||
BT_DECLARE_ALIGNED_ALLOCATOR();
|
||||
B3_DECLARE_ALIGNED_ALLOCATOR();
|
||||
|
||||
//12 bytes
|
||||
unsigned short int m_quantizedAabbMin[3];
|
||||
@@ -130,13 +130,13 @@ public:
|
||||
int m_subtreeSize;
|
||||
int m_padding[3];
|
||||
|
||||
btBvhSubtreeInfo()
|
||||
b3BvhSubtreeInfo()
|
||||
{
|
||||
//memset(&m_padding[0], 0, sizeof(m_padding));
|
||||
}
|
||||
|
||||
|
||||
void setAabbFromQuantizeNode(const btQuantizedBvhNode& quantizedNode)
|
||||
void setAabbFromQuantizeNode(const b3QuantizedBvhNode& quantizedNode)
|
||||
{
|
||||
m_quantizedAabbMin[0] = quantizedNode.m_quantizedAabbMin[0];
|
||||
m_quantizedAabbMin[1] = quantizedNode.m_quantizedAabbMin[1];
|
||||
@@ -149,10 +149,10 @@ public:
|
||||
;
|
||||
|
||||
|
||||
class btNodeOverlapCallback
|
||||
class b3NodeOverlapCallback
|
||||
{
|
||||
public:
|
||||
virtual ~btNodeOverlapCallback() {};
|
||||
virtual ~b3NodeOverlapCallback() {};
|
||||
|
||||
virtual void processNode(int subPart, int triangleIndex) = 0;
|
||||
};
|
||||
@@ -163,18 +163,18 @@ public:
|
||||
|
||||
|
||||
///for code readability:
|
||||
typedef b3AlignedObjectArray<btOptimizedBvhNode> NodeArray;
|
||||
typedef b3AlignedObjectArray<btQuantizedBvhNode> QuantizedNodeArray;
|
||||
typedef b3AlignedObjectArray<btBvhSubtreeInfo> BvhSubtreeInfoArray;
|
||||
typedef b3AlignedObjectArray<b3OptimizedBvhNode> NodeArray;
|
||||
typedef b3AlignedObjectArray<b3QuantizedBvhNode> QuantizedNodeArray;
|
||||
typedef b3AlignedObjectArray<b3BvhSubtreeInfo> BvhSubtreeInfoArray;
|
||||
|
||||
|
||||
///The b3QuantizedBvh class stores an AABB tree that can be quickly traversed on CPU and Cell SPU.
|
||||
///It is used by the btBvhTriangleMeshShape as midphase, and by the btMultiSapBroadphase.
|
||||
///It is used by the b3BvhTriangleMeshShape as midphase, and by the b3MultiSapBroadphase.
|
||||
///It is recommended to use quantization for better performance and lower memory requirements.
|
||||
ATTRIBUTE_ALIGNED16(class) b3QuantizedBvh
|
||||
{
|
||||
public:
|
||||
enum btTraversalMode
|
||||
enum b3TraversalMode
|
||||
{
|
||||
TRAVERSAL_STACKLESS = 0,
|
||||
TRAVERSAL_STACKLESS_CACHE_FRIENDLY,
|
||||
@@ -202,7 +202,7 @@ protected:
|
||||
QuantizedNodeArray m_quantizedLeafNodes;
|
||||
QuantizedNodeArray m_quantizedContiguousNodes;
|
||||
|
||||
btTraversalMode m_traversalMode;
|
||||
b3TraversalMode m_traversalMode;
|
||||
BvhSubtreeInfoArray m_SubtreeHeaders;
|
||||
|
||||
//This is only used for serialization so we don't have to add serialization directly to b3AlignedObjectArray
|
||||
@@ -310,20 +310,20 @@ protected:
|
||||
|
||||
int sortAndCalcSplittingIndex(int startIndex,int endIndex,int splitAxis);
|
||||
|
||||
void walkStacklessTree(btNodeOverlapCallback* nodeCallback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const;
|
||||
void walkStacklessTree(b3NodeOverlapCallback* nodeCallback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const;
|
||||
|
||||
void walkStacklessQuantizedTreeAgainstRay(btNodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax, int startNodeIndex,int endNodeIndex) const;
|
||||
void walkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax,int startNodeIndex,int endNodeIndex) const;
|
||||
void walkStacklessTreeAgainstRay(btNodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax, int startNodeIndex,int endNodeIndex) const;
|
||||
void walkStacklessQuantizedTreeAgainstRay(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax, int startNodeIndex,int endNodeIndex) const;
|
||||
void walkStacklessQuantizedTree(b3NodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax,int startNodeIndex,int endNodeIndex) const;
|
||||
void walkStacklessTreeAgainstRay(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax, int startNodeIndex,int endNodeIndex) const;
|
||||
|
||||
///tree traversal designed for small-memory processors like PS3 SPU
|
||||
void walkStacklessQuantizedTreeCacheFriendly(btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const;
|
||||
void walkStacklessQuantizedTreeCacheFriendly(b3NodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const;
|
||||
|
||||
///use the 16-byte stackless 'skipindex' node tree to do a recursive traversal
|
||||
void walkRecursiveQuantizedTreeAgainstQueryAabb(const btQuantizedBvhNode* currentNode,btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const;
|
||||
void walkRecursiveQuantizedTreeAgainstQueryAabb(const b3QuantizedBvhNode* currentNode,b3NodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const;
|
||||
|
||||
///use the 16-byte stackless 'skipindex' node tree to do a recursive traversal
|
||||
void walkRecursiveQuantizedTreeAgainstQuantizedTree(const btQuantizedBvhNode* treeNodeA,const btQuantizedBvhNode* treeNodeB,btNodeOverlapCallback* nodeCallback) const;
|
||||
void walkRecursiveQuantizedTreeAgainstQuantizedTree(const b3QuantizedBvhNode* treeNodeA,const b3QuantizedBvhNode* treeNodeB,b3NodeOverlapCallback* nodeCallback) const;
|
||||
|
||||
|
||||
|
||||
@@ -332,7 +332,7 @@ protected:
|
||||
|
||||
public:
|
||||
|
||||
BT_DECLARE_ALIGNED_ALLOCATOR();
|
||||
B3_DECLARE_ALIGNED_ALLOCATOR();
|
||||
|
||||
b3QuantizedBvh();
|
||||
|
||||
@@ -346,22 +346,22 @@ public:
|
||||
void buildInternal();
|
||||
///***************************************** expert/internal use only *************************
|
||||
|
||||
void reportAabbOverlappingNodex(btNodeOverlapCallback* nodeCallback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const;
|
||||
void reportRayOverlappingNodex (btNodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget) const;
|
||||
void reportBoxCastOverlappingNodex(btNodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin,const b3Vector3& aabbMax) const;
|
||||
void reportAabbOverlappingNodex(b3NodeOverlapCallback* nodeCallback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const;
|
||||
void reportRayOverlappingNodex (b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget) const;
|
||||
void reportBoxCastOverlappingNodex(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin,const b3Vector3& aabbMax) const;
|
||||
|
||||
SIMD_FORCE_INLINE void quantize(unsigned short* out, const b3Vector3& point,int isMax) const
|
||||
{
|
||||
|
||||
btAssert(m_useQuantization);
|
||||
b3Assert(m_useQuantization);
|
||||
|
||||
btAssert(point.getX() <= m_bvhAabbMax.getX());
|
||||
btAssert(point.getY() <= m_bvhAabbMax.getY());
|
||||
btAssert(point.getZ() <= m_bvhAabbMax.getZ());
|
||||
b3Assert(point.getX() <= m_bvhAabbMax.getX());
|
||||
b3Assert(point.getY() <= m_bvhAabbMax.getY());
|
||||
b3Assert(point.getZ() <= m_bvhAabbMax.getZ());
|
||||
|
||||
btAssert(point.getX() >= m_bvhAabbMin.getX());
|
||||
btAssert(point.getY() >= m_bvhAabbMin.getY());
|
||||
btAssert(point.getZ() >= m_bvhAabbMin.getZ());
|
||||
b3Assert(point.getX() >= m_bvhAabbMin.getX());
|
||||
b3Assert(point.getY() >= m_bvhAabbMin.getY());
|
||||
b3Assert(point.getZ() >= m_bvhAabbMin.getZ());
|
||||
|
||||
b3Vector3 v = (point - m_bvhAabbMin) * m_bvhQuantization;
|
||||
///Make sure rounding is done in a way that unQuantize(quantizeWithClamp(...)) is conservative
|
||||
@@ -420,7 +420,7 @@ public:
|
||||
SIMD_FORCE_INLINE void quantizeWithClamp(unsigned short* out, const b3Vector3& point2,int isMax) const
|
||||
{
|
||||
|
||||
btAssert(m_useQuantization);
|
||||
b3Assert(m_useQuantization);
|
||||
|
||||
b3Vector3 clampedPoint(point2);
|
||||
clampedPoint.setMax(m_bvhAabbMin);
|
||||
@@ -442,7 +442,7 @@ public:
|
||||
}
|
||||
|
||||
///setTraversalMode let's you choose between stackless, recursive or stackless cache friendly tree traversal. Note this is only implemented for quantized trees.
|
||||
void setTraversalMode(btTraversalMode traversalMode)
|
||||
void setTraversalMode(b3TraversalMode traversalMode)
|
||||
{
|
||||
m_traversalMode = traversalMode;
|
||||
}
|
||||
@@ -477,11 +477,11 @@ public:
|
||||
virtual int calculateSerializeBufferSizeNew() const;
|
||||
|
||||
///fills the dataBuffer and returns the struct name (and 0 on failure)
|
||||
virtual const char* serialize(void* dataBuffer, btSerializer* serializer) const;
|
||||
virtual const char* serialize(void* dataBuffer, b3Serializer* serializer) const;
|
||||
|
||||
virtual void deSerializeFloat(struct btQuantizedBvhFloatData& quantizedBvhFloatData);
|
||||
virtual void deSerializeFloat(struct b3QuantizedBvhFloatData& quantizedBvhFloatData);
|
||||
|
||||
virtual void deSerializeDouble(struct btQuantizedBvhDoubleData& quantizedBvhDoubleData);
|
||||
virtual void deSerializeDouble(struct b3QuantizedBvhDoubleData& quantizedBvhDoubleData);
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
@@ -501,7 +501,7 @@ private:
|
||||
;
|
||||
|
||||
|
||||
struct btBvhSubtreeInfoData
|
||||
struct b3BvhSubtreeInfoData
|
||||
{
|
||||
int m_rootNodeIndex;
|
||||
int m_subtreeSize;
|
||||
@@ -509,20 +509,20 @@ struct btBvhSubtreeInfoData
|
||||
unsigned short m_quantizedAabbMax[3];
|
||||
};
|
||||
|
||||
struct btOptimizedBvhNodeFloatData
|
||||
struct b3OptimizedBvhNodeFloatData
|
||||
{
|
||||
btVector3FloatData m_aabbMinOrg;
|
||||
btVector3FloatData m_aabbMaxOrg;
|
||||
b3Vector3FloatData m_aabbMinOrg;
|
||||
b3Vector3FloatData m_aabbMaxOrg;
|
||||
int m_escapeIndex;
|
||||
int m_subPart;
|
||||
int m_triangleIndex;
|
||||
char m_pad[4];
|
||||
};
|
||||
|
||||
struct btOptimizedBvhNodeDoubleData
|
||||
struct b3OptimizedBvhNodeDoubleData
|
||||
{
|
||||
btVector3DoubleData m_aabbMinOrg;
|
||||
btVector3DoubleData m_aabbMaxOrg;
|
||||
b3Vector3DoubleData m_aabbMinOrg;
|
||||
b3Vector3DoubleData m_aabbMaxOrg;
|
||||
int m_escapeIndex;
|
||||
int m_subPart;
|
||||
int m_triangleIndex;
|
||||
@@ -530,53 +530,53 @@ struct btOptimizedBvhNodeDoubleData
|
||||
};
|
||||
|
||||
|
||||
struct btQuantizedBvhNodeData
|
||||
struct b3QuantizedBvhNodeData
|
||||
{
|
||||
unsigned short m_quantizedAabbMin[3];
|
||||
unsigned short m_quantizedAabbMax[3];
|
||||
int m_escapeIndexOrTriangleIndex;
|
||||
};
|
||||
|
||||
struct btQuantizedBvhFloatData
|
||||
struct b3QuantizedBvhFloatData
|
||||
{
|
||||
btVector3FloatData m_bvhAabbMin;
|
||||
btVector3FloatData m_bvhAabbMax;
|
||||
btVector3FloatData m_bvhQuantization;
|
||||
b3Vector3FloatData m_bvhAabbMin;
|
||||
b3Vector3FloatData m_bvhAabbMax;
|
||||
b3Vector3FloatData m_bvhQuantization;
|
||||
int m_curNodeIndex;
|
||||
int m_useQuantization;
|
||||
int m_numContiguousLeafNodes;
|
||||
int m_numQuantizedContiguousNodes;
|
||||
btOptimizedBvhNodeFloatData *m_contiguousNodesPtr;
|
||||
btQuantizedBvhNodeData *m_quantizedContiguousNodesPtr;
|
||||
btBvhSubtreeInfoData *m_subTreeInfoPtr;
|
||||
b3OptimizedBvhNodeFloatData *m_contiguousNodesPtr;
|
||||
b3QuantizedBvhNodeData *m_quantizedContiguousNodesPtr;
|
||||
b3BvhSubtreeInfoData *m_subTreeInfoPtr;
|
||||
int m_traversalMode;
|
||||
int m_numSubtreeHeaders;
|
||||
|
||||
};
|
||||
|
||||
struct btQuantizedBvhDoubleData
|
||||
struct b3QuantizedBvhDoubleData
|
||||
{
|
||||
btVector3DoubleData m_bvhAabbMin;
|
||||
btVector3DoubleData m_bvhAabbMax;
|
||||
btVector3DoubleData m_bvhQuantization;
|
||||
b3Vector3DoubleData m_bvhAabbMin;
|
||||
b3Vector3DoubleData m_bvhAabbMax;
|
||||
b3Vector3DoubleData m_bvhQuantization;
|
||||
int m_curNodeIndex;
|
||||
int m_useQuantization;
|
||||
int m_numContiguousLeafNodes;
|
||||
int m_numQuantizedContiguousNodes;
|
||||
btOptimizedBvhNodeDoubleData *m_contiguousNodesPtr;
|
||||
btQuantizedBvhNodeData *m_quantizedContiguousNodesPtr;
|
||||
b3OptimizedBvhNodeDoubleData *m_contiguousNodesPtr;
|
||||
b3QuantizedBvhNodeData *m_quantizedContiguousNodesPtr;
|
||||
|
||||
int m_traversalMode;
|
||||
int m_numSubtreeHeaders;
|
||||
btBvhSubtreeInfoData *m_subTreeInfoPtr;
|
||||
b3BvhSubtreeInfoData *m_subTreeInfoPtr;
|
||||
};
|
||||
|
||||
|
||||
SIMD_FORCE_INLINE int b3QuantizedBvh::calculateSerializeBufferSizeNew() const
|
||||
{
|
||||
return sizeof(btQuantizedBvhData);
|
||||
return sizeof(b3QuantizedBvhData);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif //BT_QUANTIZED_BVH_H
|
||||
#endif //B3_QUANTIZED_BVH_H
|
||||
|
||||
@@ -22,7 +22,7 @@ b3StridingMeshInterface::~b3StridingMeshInterface()
|
||||
}
|
||||
|
||||
|
||||
void b3StridingMeshInterface::InternalProcessAllTriangles(btInternalTriangleIndexCallback* callback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const
|
||||
void b3StridingMeshInterface::InternalProcessAllTriangles(b3InternalTriangleIndexCallback* callback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const
|
||||
{
|
||||
(void)aabbMin;
|
||||
(void)aabbMax;
|
||||
@@ -104,7 +104,7 @@ void b3StridingMeshInterface::InternalProcessAllTriangles(btInternalTriangleInde
|
||||
break;
|
||||
}
|
||||
default:
|
||||
btAssert((gfxindextype == PHY_INTEGER) || (gfxindextype == PHY_SHORT));
|
||||
b3Assert((gfxindextype == PHY_INTEGER) || (gfxindextype == PHY_SHORT));
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -161,12 +161,12 @@ void b3StridingMeshInterface::InternalProcessAllTriangles(btInternalTriangleInde
|
||||
break;
|
||||
}
|
||||
default:
|
||||
btAssert((gfxindextype == PHY_INTEGER) || (gfxindextype == PHY_SHORT));
|
||||
b3Assert((gfxindextype == PHY_INTEGER) || (gfxindextype == PHY_SHORT));
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
btAssert((type == PHY_FLOAT) || (type == PHY_DOUBLE));
|
||||
b3Assert((type == PHY_FLOAT) || (type == PHY_DOUBLE));
|
||||
}
|
||||
|
||||
unLockReadOnlyVertexBase(part);
|
||||
@@ -176,15 +176,15 @@ void b3StridingMeshInterface::InternalProcessAllTriangles(btInternalTriangleInde
|
||||
void b3StridingMeshInterface::calculateAabbBruteForce(b3Vector3& aabbMin,b3Vector3& aabbMax)
|
||||
{
|
||||
|
||||
struct AabbCalculationCallback : public btInternalTriangleIndexCallback
|
||||
struct AabbCalculationCallback : public b3InternalTriangleIndexCallback
|
||||
{
|
||||
b3Vector3 m_aabbMin;
|
||||
b3Vector3 m_aabbMax;
|
||||
|
||||
AabbCalculationCallback()
|
||||
{
|
||||
m_aabbMin.setValue(b3Scalar(BT_LARGE_FLOAT),b3Scalar(BT_LARGE_FLOAT),b3Scalar(BT_LARGE_FLOAT));
|
||||
m_aabbMax.setValue(b3Scalar(-BT_LARGE_FLOAT),b3Scalar(-BT_LARGE_FLOAT),b3Scalar(-BT_LARGE_FLOAT));
|
||||
m_aabbMin.setValue(b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT));
|
||||
m_aabbMax.setValue(b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT));
|
||||
}
|
||||
|
||||
virtual void internalProcessTriangleIndex(b3Vector3* triangle,int partId,int triangleIndex)
|
||||
@@ -203,8 +203,8 @@ void b3StridingMeshInterface::calculateAabbBruteForce(b3Vector3& aabbMin,b3Vecto
|
||||
|
||||
//first calculate the total aabb for all triangles
|
||||
AabbCalculationCallback aabbCallback;
|
||||
aabbMin.setValue(b3Scalar(-BT_LARGE_FLOAT),b3Scalar(-BT_LARGE_FLOAT),b3Scalar(-BT_LARGE_FLOAT));
|
||||
aabbMax.setValue(b3Scalar(BT_LARGE_FLOAT),b3Scalar(BT_LARGE_FLOAT),b3Scalar(BT_LARGE_FLOAT));
|
||||
aabbMin.setValue(b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT));
|
||||
aabbMax.setValue(b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT));
|
||||
InternalProcessAllTriangles(&aabbCallback,aabbMin,aabbMax);
|
||||
|
||||
aabbMin = aabbCallback.m_aabbMin;
|
||||
|
||||
@@ -13,12 +13,12 @@ subject to the following restrictions:
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
#ifndef BT_STRIDING_MESHINTERFACE_H
|
||||
#define BT_STRIDING_MESHINTERFACE_H
|
||||
#ifndef B3_STRIDING_MESHINTERFACE_H
|
||||
#define B3_STRIDING_MESHINTERFACE_H
|
||||
|
||||
#include "Bullet3Common/b3Vector3.h"
|
||||
#include "b3TriangleCallback.h"
|
||||
//#include "btConcaveShape.h"
|
||||
//#include "b3ConcaveShape.h"
|
||||
|
||||
|
||||
enum PHY_ScalarType {
|
||||
@@ -27,7 +27,7 @@ enum PHY_ScalarType {
|
||||
};
|
||||
|
||||
|
||||
/// The b3StridingMeshInterface is the interface class for high performance generic access to triangle meshes, used in combination with btBvhTriangleMeshShape and some other collision shapes.
|
||||
/// The b3StridingMeshInterface is the interface class for high performance generic access to triangle meshes, used in combination with b3BvhTriangleMeshShape and some other collision shapes.
|
||||
/// Using index striding of 3*sizeof(integer) it can use triangle arrays, using index striding of 1*sizeof(integer) it can handle triangle strips.
|
||||
/// It allows for sharing graphics and collision meshes. Also it provides locking/unlocking of graphics meshes that are in gpu memory.
|
||||
ATTRIBUTE_ALIGNED16(class ) b3StridingMeshInterface
|
||||
@@ -37,7 +37,7 @@ ATTRIBUTE_ALIGNED16(class ) b3StridingMeshInterface
|
||||
b3Vector3 m_scaling;
|
||||
|
||||
public:
|
||||
BT_DECLARE_ALIGNED_ALLOCATOR();
|
||||
B3_DECLARE_ALIGNED_ALLOCATOR();
|
||||
|
||||
b3StridingMeshInterface() :m_scaling(b3Scalar(1.),b3Scalar(1.),b3Scalar(1.))
|
||||
{
|
||||
@@ -48,7 +48,7 @@ ATTRIBUTE_ALIGNED16(class ) b3StridingMeshInterface
|
||||
|
||||
|
||||
|
||||
virtual void InternalProcessAllTriangles(btInternalTriangleIndexCallback* callback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const;
|
||||
virtual void InternalProcessAllTriangles(b3InternalTriangleIndexCallback* callback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const;
|
||||
|
||||
///brute force method to calculate aabb
|
||||
void calculateAabbBruteForce(b3Vector3& aabbMin,b3Vector3& aabbMax);
|
||||
@@ -99,29 +99,29 @@ ATTRIBUTE_ALIGNED16(class ) b3StridingMeshInterface
|
||||
virtual int calculateSerializeBufferSize() const;
|
||||
|
||||
///fills the dataBuffer and returns the struct name (and 0 on failure)
|
||||
//virtual const char* serialize(void* dataBuffer, btSerializer* serializer) const;
|
||||
//virtual const char* serialize(void* dataBuffer, b3Serializer* serializer) const;
|
||||
|
||||
|
||||
};
|
||||
|
||||
struct btIntIndexData
|
||||
struct b3IntIndexData
|
||||
{
|
||||
int m_value;
|
||||
};
|
||||
|
||||
struct btShortIntIndexData
|
||||
struct b3ShortIntIndexData
|
||||
{
|
||||
short m_value;
|
||||
char m_pad[2];
|
||||
};
|
||||
|
||||
struct btShortIntIndexTripletData
|
||||
struct b3ShortIntIndexTripletData
|
||||
{
|
||||
short m_values[3];
|
||||
char m_pad[2];
|
||||
};
|
||||
|
||||
struct btCharIndexTripletData
|
||||
struct b3CharIndexTripletData
|
||||
{
|
||||
unsigned char m_values[3];
|
||||
char m_pad;
|
||||
@@ -129,16 +129,16 @@ struct btCharIndexTripletData
|
||||
|
||||
|
||||
///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
|
||||
struct btMeshPartData
|
||||
struct b3MeshPartData
|
||||
{
|
||||
btVector3FloatData *m_vertices3f;
|
||||
btVector3DoubleData *m_vertices3d;
|
||||
b3Vector3FloatData *m_vertices3f;
|
||||
b3Vector3DoubleData *m_vertices3d;
|
||||
|
||||
btIntIndexData *m_indices32;
|
||||
btShortIntIndexTripletData *m_3indices16;
|
||||
btCharIndexTripletData *m_3indices8;
|
||||
b3IntIndexData *m_indices32;
|
||||
b3ShortIntIndexTripletData *m_3indices16;
|
||||
b3CharIndexTripletData *m_3indices8;
|
||||
|
||||
btShortIntIndexData *m_indices16;//backwards compatibility
|
||||
b3ShortIntIndexData *m_indices16;//backwards compatibility
|
||||
|
||||
int m_numTriangles;//length of m_indices = m_numTriangles
|
||||
int m_numVertices;
|
||||
@@ -146,10 +146,10 @@ struct btMeshPartData
|
||||
|
||||
|
||||
///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
|
||||
struct btStridingMeshInterfaceData
|
||||
struct b3StridingMeshInterfaceData
|
||||
{
|
||||
btMeshPartData *m_meshPartsPtr;
|
||||
btVector3FloatData m_scaling;
|
||||
b3MeshPartData *m_meshPartsPtr;
|
||||
b3Vector3FloatData m_scaling;
|
||||
int m_numMeshParts;
|
||||
char m_padding[4];
|
||||
};
|
||||
@@ -159,9 +159,9 @@ struct btStridingMeshInterfaceData
|
||||
|
||||
SIMD_FORCE_INLINE int b3StridingMeshInterface::calculateSerializeBufferSize() const
|
||||
{
|
||||
return sizeof(btStridingMeshInterfaceData);
|
||||
return sizeof(b3StridingMeshInterfaceData);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif //BT_STRIDING_MESHINTERFACE_H
|
||||
#endif //B3_STRIDING_MESHINTERFACE_H
|
||||
|
||||
@@ -21,7 +21,7 @@ b3TriangleCallback::~b3TriangleCallback()
|
||||
}
|
||||
|
||||
|
||||
btInternalTriangleIndexCallback::~btInternalTriangleIndexCallback()
|
||||
b3InternalTriangleIndexCallback::~b3InternalTriangleIndexCallback()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
@@ -13,14 +13,14 @@ subject to the following restrictions:
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
#ifndef BT_TRIANGLE_CALLBACK_H
|
||||
#define BT_TRIANGLE_CALLBACK_H
|
||||
#ifndef B3_TRIANGLE_CALLBACK_H
|
||||
#define B3_TRIANGLE_CALLBACK_H
|
||||
|
||||
#include "Bullet3Common/b3Vector3.h"
|
||||
|
||||
|
||||
///The b3TriangleCallback provides a callback for each overlapping triangle when calling processAllTriangles.
|
||||
///This callback is called by processAllTriangles for all btConcaveShape derived class, such as btBvhTriangleMeshShape, btStaticPlaneShape and btHeightfieldTerrainShape.
|
||||
///This callback is called by processAllTriangles for all b3ConcaveShape derived class, such as b3BvhTriangleMeshShape, b3StaticPlaneShape and b3HeightfieldTerrainShape.
|
||||
class b3TriangleCallback
|
||||
{
|
||||
public:
|
||||
@@ -29,14 +29,14 @@ public:
|
||||
virtual void processTriangle(b3Vector3* triangle, int partId, int triangleIndex) = 0;
|
||||
};
|
||||
|
||||
class btInternalTriangleIndexCallback
|
||||
class b3InternalTriangleIndexCallback
|
||||
{
|
||||
public:
|
||||
|
||||
virtual ~btInternalTriangleIndexCallback();
|
||||
virtual ~b3InternalTriangleIndexCallback();
|
||||
virtual void internalProcessTriangleIndex(b3Vector3* triangle,int partId,int triangleIndex) = 0;
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif //BT_TRIANGLE_CALLBACK_H
|
||||
#endif //B3_TRIANGLE_CALLBACK_H
|
||||
|
||||
@@ -18,7 +18,7 @@ subject to the following restrictions:
|
||||
b3TriangleIndexVertexArray::b3TriangleIndexVertexArray(int numTriangles,int* triangleIndexBase,int triangleIndexStride,int numVertices,b3Scalar* vertexBase,int vertexStride)
|
||||
: m_hasAabb(0)
|
||||
{
|
||||
btIndexedMesh mesh;
|
||||
b3IndexedMesh mesh;
|
||||
|
||||
mesh.m_numTriangles = numTriangles;
|
||||
mesh.m_triangleIndexBase = (const unsigned char *)triangleIndexBase;
|
||||
@@ -38,9 +38,9 @@ b3TriangleIndexVertexArray::~b3TriangleIndexVertexArray()
|
||||
|
||||
void b3TriangleIndexVertexArray::getLockedVertexIndexBase(unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& vertexStride,unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart)
|
||||
{
|
||||
btAssert(subpart< getNumSubParts() );
|
||||
b3Assert(subpart< getNumSubParts() );
|
||||
|
||||
btIndexedMesh& mesh = m_indexedMeshes[subpart];
|
||||
b3IndexedMesh& mesh = m_indexedMeshes[subpart];
|
||||
|
||||
numverts = mesh.m_numVertices;
|
||||
(*vertexbase) = (unsigned char *) mesh.m_vertexBase;
|
||||
@@ -58,7 +58,7 @@ void b3TriangleIndexVertexArray::getLockedVertexIndexBase(unsigned char **vertex
|
||||
|
||||
void b3TriangleIndexVertexArray::getLockedReadOnlyVertexIndexBase(const unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& vertexStride,const unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart) const
|
||||
{
|
||||
const btIndexedMesh& mesh = m_indexedMeshes[subpart];
|
||||
const b3IndexedMesh& mesh = m_indexedMeshes[subpart];
|
||||
|
||||
numverts = mesh.m_numVertices;
|
||||
(*vertexbase) = (const unsigned char *)mesh.m_vertexBase;
|
||||
|
||||
@@ -13,19 +13,19 @@ subject to the following restrictions:
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
#ifndef BT_TRIANGLE_INDEX_VERTEX_ARRAY_H
|
||||
#define BT_TRIANGLE_INDEX_VERTEX_ARRAY_H
|
||||
#ifndef B3_TRIANGLE_INDEX_VERTEX_ARRAY_H
|
||||
#define B3_TRIANGLE_INDEX_VERTEX_ARRAY_H
|
||||
|
||||
#include "b3StridingMeshInterface.h"
|
||||
#include "Bullet3Common/b3AlignedObjectArray.h"
|
||||
#include "Bullet3Common/b3Scalar.h"
|
||||
|
||||
|
||||
///The btIndexedMesh indexes a single vertex and index array. Multiple btIndexedMesh objects can be passed into a b3TriangleIndexVertexArray using addIndexedMesh.
|
||||
///The b3IndexedMesh indexes a single vertex and index array. Multiple b3IndexedMesh objects can be passed into a b3TriangleIndexVertexArray using addIndexedMesh.
|
||||
///Instead of the number of indices, we pass the number of triangles.
|
||||
ATTRIBUTE_ALIGNED16( struct) btIndexedMesh
|
||||
ATTRIBUTE_ALIGNED16( struct) b3IndexedMesh
|
||||
{
|
||||
BT_DECLARE_ALIGNED_ALLOCATOR();
|
||||
B3_DECLARE_ALIGNED_ALLOCATOR();
|
||||
|
||||
int m_numTriangles;
|
||||
const unsigned char * m_triangleIndexBase;
|
||||
@@ -46,20 +46,20 @@ ATTRIBUTE_ALIGNED16( struct) btIndexedMesh
|
||||
PHY_ScalarType m_vertexType;
|
||||
|
||||
|
||||
btIndexedMesh()
|
||||
b3IndexedMesh()
|
||||
:m_indexType(PHY_INTEGER),
|
||||
#ifdef BT_USE_DOUBLE_PRECISION
|
||||
#ifdef B3_USE_DOUBLE_PRECISION
|
||||
m_vertexType(PHY_DOUBLE)
|
||||
#else // BT_USE_DOUBLE_PRECISION
|
||||
#else // B3_USE_DOUBLE_PRECISION
|
||||
m_vertexType(PHY_FLOAT)
|
||||
#endif // BT_USE_DOUBLE_PRECISION
|
||||
#endif // B3_USE_DOUBLE_PRECISION
|
||||
{
|
||||
}
|
||||
}
|
||||
;
|
||||
|
||||
|
||||
typedef b3AlignedObjectArray<btIndexedMesh> IndexedMeshArray;
|
||||
typedef b3AlignedObjectArray<b3IndexedMesh> IndexedMeshArray;
|
||||
|
||||
///The b3TriangleIndexVertexArray allows to access multiple triangle meshes, by indexing into existing triangle/index arrays.
|
||||
///Additional meshes can be added using addIndexedMesh
|
||||
@@ -76,7 +76,7 @@ protected:
|
||||
|
||||
public:
|
||||
|
||||
BT_DECLARE_ALIGNED_ALLOCATOR();
|
||||
B3_DECLARE_ALIGNED_ALLOCATOR();
|
||||
|
||||
b3TriangleIndexVertexArray() : m_hasAabb(0)
|
||||
{
|
||||
@@ -87,7 +87,7 @@ public:
|
||||
//just to be backwards compatible
|
||||
b3TriangleIndexVertexArray(int numTriangles,int* triangleIndexBase,int triangleIndexStride,int numVertices,b3Scalar* vertexBase,int vertexStride);
|
||||
|
||||
void addIndexedMesh(const btIndexedMesh& mesh, PHY_ScalarType indexType = PHY_INTEGER)
|
||||
void addIndexedMesh(const b3IndexedMesh& mesh, PHY_ScalarType indexType = PHY_INTEGER)
|
||||
{
|
||||
m_indexedMeshes.push_back(mesh);
|
||||
m_indexedMeshes[m_indexedMeshes.size()-1].m_indexType = indexType;
|
||||
@@ -130,4 +130,4 @@ public:
|
||||
}
|
||||
;
|
||||
|
||||
#endif //BT_TRIANGLE_INDEX_VERTEX_ARRAY_H
|
||||
#endif //B3_TRIANGLE_INDEX_VERTEX_ARRAY_H
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project
|
||||
static const char* bvhTraversalKernelCL= \
|
||||
"//keep this enum in sync with the CPU version (in btCollidable.h)\n"
|
||||
"//keep this enum in sync with the CPU version (in b3Collidable.h)\n"
|
||||
"//written by Erwin Coumans\n"
|
||||
"\n"
|
||||
"#define SHAPE_CONVEX_HULL 3\n"
|
||||
@@ -13,7 +13,7 @@ static const char* bvhTraversalKernelCL= \
|
||||
"\n"
|
||||
"#define MAX_NUM_PARTS_IN_BITS 10\n"
|
||||
"\n"
|
||||
"///btQuantizedBvhNode is a compressed aabb node, 16 bytes.\n"
|
||||
"///b3QuantizedBvhNode is a compressed aabb node, 16 bytes.\n"
|
||||
"///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
@@ -22,7 +22,7 @@ static const char* bvhTraversalKernelCL= \
|
||||
" unsigned short int m_quantizedAabbMax[3];\n"
|
||||
" //4 bytes\n"
|
||||
" int m_escapeIndexOrTriangleIndex;\n"
|
||||
"} btQuantizedBvhNode;\n"
|
||||
"} b3QuantizedBvhNode;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
@@ -44,12 +44,12 @@ static const char* bvhTraversalKernelCL= \
|
||||
" }\n"
|
||||
" int getEscapeIndex() const\n"
|
||||
" {\n"
|
||||
" btAssert(!isLeafNode());\n"
|
||||
" b3Assert(!isLeafNode());\n"
|
||||
" return -m_escapeIndexOrTriangleIndex;\n"
|
||||
" }\n"
|
||||
" int getTriangleIndex() const\n"
|
||||
" {\n"
|
||||
" btAssert(isLeafNode());\n"
|
||||
" b3Assert(isLeafNode());\n"
|
||||
" unsigned int x=0;\n"
|
||||
" unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n"
|
||||
" // Get only the lower bits where the triangle index is stored\n"
|
||||
@@ -57,13 +57,13 @@ static const char* bvhTraversalKernelCL= \
|
||||
" }\n"
|
||||
" int getPartId() const\n"
|
||||
" {\n"
|
||||
" btAssert(isLeafNode());\n"
|
||||
" b3Assert(isLeafNode());\n"
|
||||
" // Get only the highest bits where the part index is stored\n"
|
||||
" return (m_escapeIndexOrTriangleIndex>>(31-MAX_NUM_PARTS_IN_BITS));\n"
|
||||
" }\n"
|
||||
"*/\n"
|
||||
"\n"
|
||||
"int getTriangleIndex(const btQuantizedBvhNode* rootNode)\n"
|
||||
"int getTriangleIndex(const b3QuantizedBvhNode* rootNode)\n"
|
||||
"{\n"
|
||||
" unsigned int x=0;\n"
|
||||
" unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n"
|
||||
@@ -71,13 +71,13 @@ static const char* bvhTraversalKernelCL= \
|
||||
" return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"int isLeaf(const btQuantizedBvhNode* rootNode)\n"
|
||||
"int isLeaf(const b3QuantizedBvhNode* rootNode)\n"
|
||||
"{\n"
|
||||
" //skipindex is negative (internal node), triangleindex >=0 (leafnode)\n"
|
||||
" return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n"
|
||||
"}\n"
|
||||
" \n"
|
||||
"int getEscapeIndex(const btQuantizedBvhNode* rootNode)\n"
|
||||
"int getEscapeIndex(const b3QuantizedBvhNode* rootNode)\n"
|
||||
"{\n"
|
||||
" return -rootNode->m_escapeIndexOrTriangleIndex;\n"
|
||||
"}\n"
|
||||
@@ -92,9 +92,9 @@ static const char* bvhTraversalKernelCL= \
|
||||
" //4 bytes\n"
|
||||
" int m_subtreeSize;\n"
|
||||
" int m_padding[3];\n"
|
||||
"} btBvhSubtreeInfo;\n"
|
||||
"} b3BvhSubtreeInfo;\n"
|
||||
"\n"
|
||||
"///keep this in sync with btCollidable.h\n"
|
||||
"///keep this in sync with b3Collidable.h\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" int m_numChildShapes;\n"
|
||||
@@ -102,7 +102,7 @@ static const char* bvhTraversalKernelCL= \
|
||||
" int m_shapeType;\n"
|
||||
" int m_shapeIndex;\n"
|
||||
" \n"
|
||||
"} btCollidableGpu;\n"
|
||||
"} b3CollidableGpu;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
@@ -112,7 +112,7 @@ static const char* bvhTraversalKernelCL= \
|
||||
" int m_unused0;\n"
|
||||
" int m_unused1;\n"
|
||||
" int m_unused2;\n"
|
||||
"} btGpuChildShape;\n"
|
||||
"} b3GpuChildShape;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
@@ -142,7 +142,7 @@ static const char* bvhTraversalKernelCL= \
|
||||
" float m_maxElems[4];\n"
|
||||
" int m_maxIndices[4];\n"
|
||||
" };\n"
|
||||
"} btAabbCL;\n"
|
||||
"} b3AabbCL;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"int testQuantizedAabbAgainstQuantizedAabb(\n"
|
||||
@@ -196,12 +196,12 @@ static const char* bvhTraversalKernelCL= \
|
||||
"// work-in-progress\n"
|
||||
"__kernel void bvhTraversalKernel( __global const int2* pairs, \n"
|
||||
" __global const BodyData* rigidBodies, \n"
|
||||
" __global const btCollidableGpu* collidables,\n"
|
||||
" __global btAabbCL* aabbs,\n"
|
||||
" __global const b3CollidableGpu* collidables,\n"
|
||||
" __global b3AabbCL* aabbs,\n"
|
||||
" __global int4* concavePairsOut,\n"
|
||||
" __global volatile int* numConcavePairsOut,\n"
|
||||
" __global const btBvhSubtreeInfo* subtreeHeadersRoot,\n"
|
||||
" __global const btQuantizedBvhNode* quantizedNodesRoot,\n"
|
||||
" __global const b3BvhSubtreeInfo* subtreeHeadersRoot,\n"
|
||||
" __global const b3QuantizedBvhNode* quantizedNodesRoot,\n"
|
||||
" __global const b3BvhInfo* bvhInfos,\n"
|
||||
" int numPairs,\n"
|
||||
" int maxNumConcavePairsCapacity)\n"
|
||||
@@ -238,8 +238,8 @@ static const char* bvhTraversalKernelCL= \
|
||||
" float4 bvhAabbMax = bvhInfo.m_aabbMax;\n"
|
||||
" float4 bvhQuantization = bvhInfo.m_quantization;\n"
|
||||
" int numSubtreeHeaders = bvhInfo.m_numSubTrees;\n"
|
||||
" __global const btBvhSubtreeInfo* subtreeHeaders = &subtreeHeadersRoot[bvhInfo.m_subTreeOffset];\n"
|
||||
" __global const btQuantizedBvhNode* quantizedNodes = &quantizedNodesRoot[bvhInfo.m_nodeOffset];\n"
|
||||
" __global const b3BvhSubtreeInfo* subtreeHeaders = &subtreeHeadersRoot[bvhInfo.m_subTreeOffset];\n"
|
||||
" __global const b3QuantizedBvhNode* quantizedNodes = &quantizedNodesRoot[bvhInfo.m_nodeOffset];\n"
|
||||
" \n"
|
||||
"\n"
|
||||
" unsigned short int quantizedQueryAabbMin[3];\n"
|
||||
@@ -249,7 +249,7 @@ static const char* bvhTraversalKernelCL= \
|
||||
" \n"
|
||||
" for (int i=0;i<numSubtreeHeaders;i++)\n"
|
||||
" {\n"
|
||||
" btBvhSubtreeInfo subtree = subtreeHeaders[i];\n"
|
||||
" b3BvhSubtreeInfo subtree = subtreeHeaders[i];\n"
|
||||
" \n"
|
||||
" int overlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax);\n"
|
||||
" if (overlap != 0)\n"
|
||||
@@ -262,7 +262,7 @@ static const char* bvhTraversalKernelCL= \
|
||||
" int aabbOverlap;\n"
|
||||
" while (curIndex < endNodeIndex)\n"
|
||||
" {\n"
|
||||
" btQuantizedBvhNode rootNode = quantizedNodes[curIndex];\n"
|
||||
" b3QuantizedBvhNode rootNode = quantizedNodes[curIndex];\n"
|
||||
" aabbOverlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode.m_quantizedAabbMin,rootNode.m_quantizedAabbMax);\n"
|
||||
" isLeafNode = isLeaf(&rootNode);\n"
|
||||
" if (aabbOverlap)\n"
|
||||
|
||||
@@ -67,9 +67,9 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" float m_maxElems[4];\n"
|
||||
" int m_maxIndices[4];\n"
|
||||
" };\n"
|
||||
"} btAabbCL;\n"
|
||||
"} b3AabbCL;\n"
|
||||
"\n"
|
||||
"///keep this in sync with btCollidable.h\n"
|
||||
"///keep this in sync with b3Collidable.h\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" int m_numChildShapes;\n"
|
||||
@@ -77,7 +77,7 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" int m_shapeType;\n"
|
||||
" int m_shapeIndex;\n"
|
||||
" \n"
|
||||
"} btCollidableGpu;\n"
|
||||
"} b3CollidableGpu;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
@@ -87,7 +87,7 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" int m_unused0;\n"
|
||||
" int m_unused1;\n"
|
||||
" int m_unused2;\n"
|
||||
"} btGpuChildShape;\n"
|
||||
"} b3GpuChildShape;\n"
|
||||
"\n"
|
||||
"#define GET_NPOINTS(x) (x).m_worldNormal.w\n"
|
||||
"\n"
|
||||
@@ -129,7 +129,7 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" float4 m_plane;\n"
|
||||
" int m_indexOffset;\n"
|
||||
" int m_numIndices;\n"
|
||||
"} btGpuFace;\n"
|
||||
"} b3GpuFace;\n"
|
||||
"\n"
|
||||
"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n"
|
||||
"\n"
|
||||
@@ -290,7 +290,7 @@ static const char* primitiveContactsKernelsCL= \
|
||||
"\n"
|
||||
"\n"
|
||||
"inline bool IsPointInPolygon(float4 p, \n"
|
||||
" const btGpuFace* face,\n"
|
||||
" const b3GpuFace* face,\n"
|
||||
" __global const float4* baseVertex,\n"
|
||||
" __global const int* convexIndices,\n"
|
||||
" float4* out)\n"
|
||||
@@ -352,11 +352,11 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" int bodyIndexA, int bodyIndexB, \n"
|
||||
" int collidableIndexA, int collidableIndexB, \n"
|
||||
" __global const BodyData* rigidBodies, \n"
|
||||
" __global const btCollidableGpu* collidables,\n"
|
||||
" __global const b3CollidableGpu* collidables,\n"
|
||||
" __global const ConvexPolyhedronCL* convexShapes,\n"
|
||||
" __global const float4* convexVertices,\n"
|
||||
" __global const int* convexIndices,\n"
|
||||
" __global const btGpuFace* faces,\n"
|
||||
" __global const b3GpuFace* faces,\n"
|
||||
" __global Contact4* restrict globalContactsOut,\n"
|
||||
" counter32_t nGlobalContactsOut,\n"
|
||||
" int maxContactCapacity,\n"
|
||||
@@ -383,7 +383,7 @@ static const char* primitiveContactsKernelsCL= \
|
||||
"\n"
|
||||
" for ( int f = 0; f < numFaces; f++ )\n"
|
||||
" {\n"
|
||||
" btGpuFace face = faces[convexShapes[shapeIndex].m_faceOffset+f];\n"
|
||||
" b3GpuFace face = faces[convexShapes[shapeIndex].m_faceOffset+f];\n"
|
||||
"\n"
|
||||
" // set up a plane equation \n"
|
||||
" float4 planeEqn;\n"
|
||||
@@ -594,11 +594,11 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" int bodyIndexA, int bodyIndexB, \n"
|
||||
" int collidableIndexA, int collidableIndexB, \n"
|
||||
" __global const BodyData* rigidBodies, \n"
|
||||
" __global const btCollidableGpu*collidables,\n"
|
||||
" __global const b3CollidableGpu*collidables,\n"
|
||||
" __global const ConvexPolyhedronCL* convexShapes,\n"
|
||||
" __global const float4* convexVertices,\n"
|
||||
" __global const int* convexIndices,\n"
|
||||
" __global const btGpuFace* faces,\n"
|
||||
" __global const b3GpuFace* faces,\n"
|
||||
" __global Contact4* restrict globalContactsOut,\n"
|
||||
" counter32_t nGlobalContactsOut,\n"
|
||||
" int maxContactCapacity,\n"
|
||||
@@ -733,8 +733,8 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" int bodyIndexA, int bodyIndexB, \n"
|
||||
" int collidableIndexA, int collidableIndexB, \n"
|
||||
" __global const BodyData* rigidBodies, \n"
|
||||
" __global const btCollidableGpu* collidables,\n"
|
||||
" __global const btGpuFace* faces,\n"
|
||||
" __global const b3CollidableGpu* collidables,\n"
|
||||
" __global const b3GpuFace* faces,\n"
|
||||
" __global Contact4* restrict globalContactsOut,\n"
|
||||
" counter32_t nGlobalContactsOut,\n"
|
||||
" int maxContactCapacity)\n"
|
||||
@@ -793,11 +793,11 @@ static const char* primitiveContactsKernelsCL= \
|
||||
"\n"
|
||||
"__kernel void primitiveContactsKernel( __global const int2* pairs, \n"
|
||||
" __global const BodyData* rigidBodies, \n"
|
||||
" __global const btCollidableGpu* collidables,\n"
|
||||
" __global const b3CollidableGpu* collidables,\n"
|
||||
" __global const ConvexPolyhedronCL* convexShapes, \n"
|
||||
" __global const float4* vertices,\n"
|
||||
" __global const float4* uniqueEdges,\n"
|
||||
" __global const btGpuFace* faces,\n"
|
||||
" __global const b3GpuFace* faces,\n"
|
||||
" __global const int* indices,\n"
|
||||
" __global Contact4* restrict globalContactsOut,\n"
|
||||
" counter32_t nGlobalContactsOut,\n"
|
||||
@@ -972,14 +972,14 @@ static const char* primitiveContactsKernelsCL= \
|
||||
"// work-in-progress\n"
|
||||
"__kernel void processCompoundPairsPrimitivesKernel( __global const int4* gpuCompoundPairs,\n"
|
||||
" __global const BodyData* rigidBodies, \n"
|
||||
" __global const btCollidableGpu* collidables,\n"
|
||||
" __global const b3CollidableGpu* collidables,\n"
|
||||
" __global const ConvexPolyhedronCL* convexShapes, \n"
|
||||
" __global const float4* vertices,\n"
|
||||
" __global const float4* uniqueEdges,\n"
|
||||
" __global const btGpuFace* faces,\n"
|
||||
" __global const b3GpuFace* faces,\n"
|
||||
" __global const int* indices,\n"
|
||||
" __global btAabbCL* aabbs,\n"
|
||||
" __global const btGpuChildShape* gpuChildShapes,\n"
|
||||
" __global b3AabbCL* aabbs,\n"
|
||||
" __global const b3GpuChildShape* gpuChildShapes,\n"
|
||||
" __global Contact4* restrict globalContactsOut,\n"
|
||||
" counter32_t nGlobalContactsOut,\n"
|
||||
" int numCompoundPairs, int maxContactCapacity\n"
|
||||
@@ -1157,7 +1157,7 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" int bodyIndexA, int bodyIndexB,\n"
|
||||
" int collidableIndexA, int collidableIndexB, \n"
|
||||
" __global const BodyData* rigidBodies, \n"
|
||||
" __global const btCollidableGpu* collidables,\n"
|
||||
" __global const b3CollidableGpu* collidables,\n"
|
||||
" const float4* triangleVertices,\n"
|
||||
" __global Contact4* restrict globalContactsOut,\n"
|
||||
" counter32_t nGlobalContactsOut,\n"
|
||||
@@ -1299,13 +1299,13 @@ static const char* primitiveContactsKernelsCL= \
|
||||
"// work-in-progress\n"
|
||||
"__kernel void findConcaveSphereContactsKernel( __global int4* concavePairs,\n"
|
||||
" __global const BodyData* rigidBodies,\n"
|
||||
" __global const btCollidableGpu* collidables,\n"
|
||||
" __global const b3CollidableGpu* collidables,\n"
|
||||
" __global const ConvexPolyhedronCL* convexShapes, \n"
|
||||
" __global const float4* vertices,\n"
|
||||
" __global const float4* uniqueEdges,\n"
|
||||
" __global const btGpuFace* faces,\n"
|
||||
" __global const b3GpuFace* faces,\n"
|
||||
" __global const int* indices,\n"
|
||||
" __global btAabbCL* aabbs,\n"
|
||||
" __global b3AabbCL* aabbs,\n"
|
||||
" __global Contact4* restrict globalContactsOut,\n"
|
||||
" counter32_t nGlobalContactsOut,\n"
|
||||
" int numConcavePairs, int maxContactCapacity\n"
|
||||
@@ -1329,7 +1329,7 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" if (collidables[collidableIndexB].m_shapeType==SHAPE_SPHERE)\n"
|
||||
" {\n"
|
||||
" int f = concavePairs[i].z;\n"
|
||||
" btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n"
|
||||
" b3GpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n"
|
||||
" \n"
|
||||
" float4 verticesA[3];\n"
|
||||
" for (int i=0;i<3;i++)\n"
|
||||
|
||||
@@ -55,7 +55,7 @@ static const char* satClipKernelsCL= \
|
||||
"} Contact4;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"///keep this in sync with btCollidable.h\n"
|
||||
"///keep this in sync with b3Collidable.h\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" int m_numChildShapes;\n"
|
||||
@@ -63,7 +63,7 @@ static const char* satClipKernelsCL= \
|
||||
" int m_shapeType;\n"
|
||||
" int m_shapeIndex;\n"
|
||||
" \n"
|
||||
"} btCollidableGpu;\n"
|
||||
"} b3CollidableGpu;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
@@ -73,7 +73,7 @@ static const char* satClipKernelsCL= \
|
||||
" int m_unused0;\n"
|
||||
" int m_unused1;\n"
|
||||
" int m_unused2;\n"
|
||||
"} btGpuChildShape;\n"
|
||||
"} b3GpuChildShape;\n"
|
||||
"\n"
|
||||
"#define GET_NPOINTS(x) (x).m_worldNormal.w\n"
|
||||
"\n"
|
||||
@@ -115,7 +115,7 @@ static const char* satClipKernelsCL= \
|
||||
" float4 m_plane;\n"
|
||||
" int m_indexOffset;\n"
|
||||
" int m_numIndices;\n"
|
||||
"} btGpuFace;\n"
|
||||
"} b3GpuFace;\n"
|
||||
"\n"
|
||||
"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n"
|
||||
"\n"
|
||||
@@ -357,7 +357,7 @@ static const char* satClipKernelsCL= \
|
||||
" float4* worldVertsB2, int capacityWorldVertsB2,\n"
|
||||
" const float minDist, float maxDist,\n"
|
||||
" __global const float4* vertices,\n"
|
||||
" __global const btGpuFace* faces,\n"
|
||||
" __global const b3GpuFace* faces,\n"
|
||||
" __global const int* indices,\n"
|
||||
" float4* contactsOut,\n"
|
||||
" int contactCapacity)\n"
|
||||
@@ -392,7 +392,7 @@ static const char* satClipKernelsCL= \
|
||||
" if (closestFaceA<0)\n"
|
||||
" return numContactsOut;\n"
|
||||
"\n"
|
||||
" btGpuFace polyA = faces[hullA->m_faceOffset+closestFaceA];\n"
|
||||
" b3GpuFace polyA = faces[hullA->m_faceOffset+closestFaceA];\n"
|
||||
"\n"
|
||||
" // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n"
|
||||
" int numVerticesA = polyA.m_numIndices;\n"
|
||||
@@ -416,7 +416,7 @@ static const char* satClipKernelsCL= \
|
||||
" //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);\n"
|
||||
" numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut);\n"
|
||||
"\n"
|
||||
" //btSwap(pVtxIn,pVtxOut);\n"
|
||||
" //b3Swap(pVtxIn,pVtxOut);\n"
|
||||
" float4* tmp = pVtxOut;\n"
|
||||
" pVtxOut = pVtxIn;\n"
|
||||
" pVtxIn = tmp;\n"
|
||||
@@ -458,10 +458,10 @@ static const char* satClipKernelsCL= \
|
||||
" float4* worldVertsB2, int capacityWorldVertsB2,\n"
|
||||
" const float minDist, float maxDist,\n"
|
||||
" const float4* verticesA,\n"
|
||||
" const btGpuFace* facesA,\n"
|
||||
" const b3GpuFace* facesA,\n"
|
||||
" const int* indicesA,\n"
|
||||
" __global const float4* verticesB,\n"
|
||||
" __global const btGpuFace* facesB,\n"
|
||||
" __global const b3GpuFace* facesB,\n"
|
||||
" __global const int* indicesB,\n"
|
||||
" float4* contactsOut,\n"
|
||||
" int contactCapacity)\n"
|
||||
@@ -496,7 +496,7 @@ static const char* satClipKernelsCL= \
|
||||
" if (closestFaceA<0)\n"
|
||||
" return numContactsOut;\n"
|
||||
"\n"
|
||||
" btGpuFace polyA = facesA[hullA->m_faceOffset+closestFaceA];\n"
|
||||
" b3GpuFace polyA = facesA[hullA->m_faceOffset+closestFaceA];\n"
|
||||
"\n"
|
||||
" // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n"
|
||||
" int numVerticesA = polyA.m_numIndices;\n"
|
||||
@@ -520,7 +520,7 @@ static const char* satClipKernelsCL= \
|
||||
" //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);\n"
|
||||
" numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut);\n"
|
||||
"\n"
|
||||
" //btSwap(pVtxIn,pVtxOut);\n"
|
||||
" //b3Swap(pVtxIn,pVtxOut);\n"
|
||||
" float4* tmp = pVtxOut;\n"
|
||||
" pVtxOut = pVtxIn;\n"
|
||||
" pVtxIn = tmp;\n"
|
||||
@@ -561,7 +561,7 @@ static const char* satClipKernelsCL= \
|
||||
" float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts,\n"
|
||||
" const float minDist, float maxDist,\n"
|
||||
" __global const float4* vertices,\n"
|
||||
" __global const btGpuFace* faces,\n"
|
||||
" __global const b3GpuFace* faces,\n"
|
||||
" __global const int* indices,\n"
|
||||
" float4* localContactsOut,\n"
|
||||
" int localContactCapacity)\n"
|
||||
@@ -589,7 +589,7 @@ static const char* satClipKernelsCL= \
|
||||
" }\n"
|
||||
"\n"
|
||||
" {\n"
|
||||
" const btGpuFace polyB = faces[hullB->m_faceOffset+closestFaceB];\n"
|
||||
" const b3GpuFace polyB = faces[hullB->m_faceOffset+closestFaceB];\n"
|
||||
" const int numVertices = polyB.m_numIndices;\n"
|
||||
" for(int e0=0;e0<numVertices;e0++)\n"
|
||||
" {\n"
|
||||
@@ -617,10 +617,10 @@ static const char* satClipKernelsCL= \
|
||||
" float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts,\n"
|
||||
" const float minDist, float maxDist,\n"
|
||||
" const float4* verticesA,\n"
|
||||
" const btGpuFace* facesA,\n"
|
||||
" const b3GpuFace* facesA,\n"
|
||||
" const int* indicesA,\n"
|
||||
" __global const float4* verticesB,\n"
|
||||
" __global const btGpuFace* facesB,\n"
|
||||
" __global const b3GpuFace* facesB,\n"
|
||||
" __global const int* indicesB,\n"
|
||||
" float4* localContactsOut,\n"
|
||||
" int localContactCapacity)\n"
|
||||
@@ -648,7 +648,7 @@ static const char* satClipKernelsCL= \
|
||||
" }\n"
|
||||
"\n"
|
||||
" {\n"
|
||||
" const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB];\n"
|
||||
" const b3GpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB];\n"
|
||||
" const int numVertices = polyB.m_numIndices;\n"
|
||||
" for(int e0=0;e0<numVertices;e0++)\n"
|
||||
" {\n"
|
||||
@@ -956,11 +956,11 @@ static const char* satClipKernelsCL= \
|
||||
"\n"
|
||||
"__kernel void clipHullHullKernel( __global const int2* pairs, \n"
|
||||
" __global const BodyData* rigidBodies, \n"
|
||||
" __global const btCollidableGpu* collidables,\n"
|
||||
" __global const b3CollidableGpu* collidables,\n"
|
||||
" __global const ConvexPolyhedronCL* convexShapes, \n"
|
||||
" __global const float4* vertices,\n"
|
||||
" __global const float4* uniqueEdges,\n"
|
||||
" __global const btGpuFace* faces,\n"
|
||||
" __global const b3GpuFace* faces,\n"
|
||||
" __global const int* indices,\n"
|
||||
" __global const float4* separatingNormals,\n"
|
||||
" __global const int* hasSeparatingAxis,\n"
|
||||
@@ -1053,13 +1053,13 @@ static const char* satClipKernelsCL= \
|
||||
"\n"
|
||||
"__kernel void clipCompoundsHullHullKernel( __global const int4* gpuCompoundPairs, \n"
|
||||
" __global const BodyData* rigidBodies, \n"
|
||||
" __global const btCollidableGpu* collidables,\n"
|
||||
" __global const b3CollidableGpu* collidables,\n"
|
||||
" __global const ConvexPolyhedronCL* convexShapes, \n"
|
||||
" __global const float4* vertices,\n"
|
||||
" __global const float4* uniqueEdges,\n"
|
||||
" __global const btGpuFace* faces,\n"
|
||||
" __global const b3GpuFace* faces,\n"
|
||||
" __global const int* indices,\n"
|
||||
" __global const btGpuChildShape* gpuChildShapes,\n"
|
||||
" __global const b3GpuChildShape* gpuChildShapes,\n"
|
||||
" __global const float4* gpuCompoundSepNormalsOut,\n"
|
||||
" __global const int* gpuHasCompoundSepNormalsOut,\n"
|
||||
" __global Contact4* restrict globalContactsOut,\n"
|
||||
@@ -1185,7 +1185,7 @@ static const char* satClipKernelsCL= \
|
||||
"\n"
|
||||
"__kernel void sphereSphereCollisionKernel( __global const int2* pairs, \n"
|
||||
" __global const BodyData* rigidBodies, \n"
|
||||
" __global const btCollidableGpu* collidables,\n"
|
||||
" __global const b3CollidableGpu* collidables,\n"
|
||||
" __global const float4* separatingNormals,\n"
|
||||
" __global const int* hasSeparatingAxis,\n"
|
||||
" __global Contact4* restrict globalContactsOut,\n"
|
||||
@@ -1252,13 +1252,13 @@ static const char* satClipKernelsCL= \
|
||||
"\n"
|
||||
"__kernel void clipHullHullConcaveConvexKernel( __global int4* concavePairsIn,\n"
|
||||
" __global const BodyData* rigidBodies, \n"
|
||||
" __global const btCollidableGpu* collidables,\n"
|
||||
" __global const b3CollidableGpu* collidables,\n"
|
||||
" __global const ConvexPolyhedronCL* convexShapes, \n"
|
||||
" __global const float4* vertices,\n"
|
||||
" __global const float4* uniqueEdges,\n"
|
||||
" __global const btGpuFace* faces,\n"
|
||||
" __global const b3GpuFace* faces,\n"
|
||||
" __global const int* indices,\n"
|
||||
" __global const btGpuChildShape* gpuChildShapes,\n"
|
||||
" __global const b3GpuChildShape* gpuChildShapes,\n"
|
||||
" __global const float4* separatingNormals,\n"
|
||||
" __global Contact4* restrict globalContactsOut,\n"
|
||||
" counter32_t nGlobalContactsOut,\n"
|
||||
@@ -1306,7 +1306,7 @@ static const char* satClipKernelsCL= \
|
||||
" convexPolyhedronA.m_vertexOffset = 0;\n"
|
||||
" float4 localCenter = make_float4(0.f,0.f,0.f,0.f);\n"
|
||||
"\n"
|
||||
" btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n"
|
||||
" b3GpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n"
|
||||
" \n"
|
||||
" float4 verticesA[3];\n"
|
||||
" for (int i=0;i<3;i++)\n"
|
||||
@@ -1335,7 +1335,7 @@ static const char* satClipKernelsCL= \
|
||||
" \n"
|
||||
" float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n"
|
||||
" \n"
|
||||
" btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];\n"
|
||||
" b3GpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];\n"
|
||||
" int indicesA[3+3+2+2+2];\n"
|
||||
" int curUsedIndices=0;\n"
|
||||
" int fidx=0;\n"
|
||||
@@ -1496,7 +1496,7 @@ static const char* satClipKernelsCL= \
|
||||
" int capacityWorldVerts,\n"
|
||||
" const float minDist, float maxDist,\n"
|
||||
" __global const float4* vertices,\n"
|
||||
" __global const btGpuFace* faces,\n"
|
||||
" __global const b3GpuFace* faces,\n"
|
||||
" __global const int* indices,\n"
|
||||
" __global int4* clippingFaces, int pairIndex)\n"
|
||||
"{\n"
|
||||
@@ -1523,7 +1523,7 @@ static const char* satClipKernelsCL= \
|
||||
" }\n"
|
||||
" \n"
|
||||
" {\n"
|
||||
" const btGpuFace polyB = faces[hullB->m_faceOffset+closestFaceB];\n"
|
||||
" const b3GpuFace polyB = faces[hullB->m_faceOffset+closestFaceB];\n"
|
||||
" const int numVertices = polyB.m_numIndices;\n"
|
||||
" for(int e0=0;e0<numVertices;e0++)\n"
|
||||
" {\n"
|
||||
@@ -1664,11 +1664,11 @@ static const char* satClipKernelsCL= \
|
||||
"\n"
|
||||
"__kernel void findClippingFacesKernel( __global const int2* pairs,\n"
|
||||
" __global const BodyData* rigidBodies,\n"
|
||||
" __global const btCollidableGpu* collidables,\n"
|
||||
" __global const b3CollidableGpu* collidables,\n"
|
||||
" __global const ConvexPolyhedronCL* convexShapes,\n"
|
||||
" __global const float4* vertices,\n"
|
||||
" __global const float4* uniqueEdges,\n"
|
||||
" __global const btGpuFace* faces,\n"
|
||||
" __global const b3GpuFace* faces,\n"
|
||||
" __global const int* indices,\n"
|
||||
" __global const float4* separatingNormals,\n"
|
||||
" __global const int* hasSeparatingAxis,\n"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project
|
||||
static const char* satKernelsCL= \
|
||||
"//keep this enum in sync with the CPU version (in btCollidable.h)\n"
|
||||
"//keep this enum in sync with the CPU version (in b3Collidable.h)\n"
|
||||
"//written by Erwin Coumans\n"
|
||||
"\n"
|
||||
"\n"
|
||||
@@ -13,7 +13,7 @@ static const char* satKernelsCL= \
|
||||
"\n"
|
||||
"typedef unsigned int u32;\n"
|
||||
"\n"
|
||||
"///keep this in sync with btCollidable.h\n"
|
||||
"///keep this in sync with b3Collidable.h\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" int m_numChildShapes;\n"
|
||||
@@ -21,7 +21,7 @@ static const char* satKernelsCL= \
|
||||
" int m_shapeType;\n"
|
||||
" int m_shapeIndex;\n"
|
||||
" \n"
|
||||
"} btCollidableGpu;\n"
|
||||
"} b3CollidableGpu;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
@@ -31,7 +31,7 @@ static const char* satKernelsCL= \
|
||||
" int m_unused0;\n"
|
||||
" int m_unused1;\n"
|
||||
" int m_unused2;\n"
|
||||
"} btGpuChildShape;\n"
|
||||
"} b3GpuChildShape;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
@@ -80,14 +80,14 @@ static const char* satKernelsCL= \
|
||||
" float m_maxElems[4];\n"
|
||||
" int m_maxIndices[4];\n"
|
||||
" };\n"
|
||||
"} btAabbCL;\n"
|
||||
"} b3AabbCL;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_plane;\n"
|
||||
" int m_indexOffset;\n"
|
||||
" int m_numIndices;\n"
|
||||
"} btGpuFace;\n"
|
||||
"} b3GpuFace;\n"
|
||||
"\n"
|
||||
"#define make_float4 (float4)\n"
|
||||
"\n"
|
||||
@@ -296,12 +296,12 @@ static const char* satKernelsCL= \
|
||||
" \n"
|
||||
" const float4* verticesA, \n"
|
||||
" const float4* uniqueEdgesA, \n"
|
||||
" const btGpuFace* facesA,\n"
|
||||
" const b3GpuFace* facesA,\n"
|
||||
" const int* indicesA,\n"
|
||||
"\n"
|
||||
" __global const float4* verticesB, \n"
|
||||
" __global const float4* uniqueEdgesB, \n"
|
||||
" __global const btGpuFace* facesB,\n"
|
||||
" __global const b3GpuFace* facesB,\n"
|
||||
" __global const int* indicesB,\n"
|
||||
" float4* sep,\n"
|
||||
" float* dmin)\n"
|
||||
@@ -348,11 +348,11 @@ static const char* satKernelsCL= \
|
||||
" const float4 DeltaC2,\n"
|
||||
" __global const float4* verticesA, \n"
|
||||
" __global const float4* uniqueEdgesA, \n"
|
||||
" __global const btGpuFace* facesA,\n"
|
||||
" __global const b3GpuFace* facesA,\n"
|
||||
" __global const int* indicesA,\n"
|
||||
" const float4* verticesB,\n"
|
||||
" const float4* uniqueEdgesB, \n"
|
||||
" const btGpuFace* facesB,\n"
|
||||
" const b3GpuFace* facesB,\n"
|
||||
" const int* indicesB,\n"
|
||||
" float4* sep,\n"
|
||||
" float* dmin)\n"
|
||||
@@ -401,11 +401,11 @@ static const char* satKernelsCL= \
|
||||
" const float4 DeltaC2,\n"
|
||||
" const float4* verticesA, \n"
|
||||
" const float4* uniqueEdgesA, \n"
|
||||
" const btGpuFace* facesA,\n"
|
||||
" const b3GpuFace* facesA,\n"
|
||||
" const int* indicesA,\n"
|
||||
" __global const float4* verticesB, \n"
|
||||
" __global const float4* uniqueEdgesB, \n"
|
||||
" __global const btGpuFace* facesB,\n"
|
||||
" __global const b3GpuFace* facesB,\n"
|
||||
" __global const int* indicesB,\n"
|
||||
" float4* sep,\n"
|
||||
" float* dmin)\n"
|
||||
@@ -507,7 +507,7 @@ static const char* satKernelsCL= \
|
||||
" const float4 DeltaC2,\n"
|
||||
" __global const float4* vertices, \n"
|
||||
" __global const float4* uniqueEdges, \n"
|
||||
" __global const btGpuFace* faces,\n"
|
||||
" __global const b3GpuFace* faces,\n"
|
||||
" __global const int* indices,\n"
|
||||
" float4* sep,\n"
|
||||
" float* dmin)\n"
|
||||
@@ -566,7 +566,7 @@ static const char* satKernelsCL= \
|
||||
" const float4 DeltaC2,\n"
|
||||
" __global const float4* vertices, \n"
|
||||
" __global const float4* uniqueEdges, \n"
|
||||
" __global const btGpuFace* faces,\n"
|
||||
" __global const b3GpuFace* faces,\n"
|
||||
" __global const int* indices,\n"
|
||||
" float4* sep,\n"
|
||||
" float* dmin)\n"
|
||||
@@ -643,14 +643,14 @@ static const char* satKernelsCL= \
|
||||
"// work-in-progress\n"
|
||||
"__kernel void processCompoundPairsKernel( __global const int4* gpuCompoundPairs,\n"
|
||||
" __global const BodyData* rigidBodies, \n"
|
||||
" __global const btCollidableGpu* collidables,\n"
|
||||
" __global const b3CollidableGpu* collidables,\n"
|
||||
" __global const ConvexPolyhedronCL* convexShapes, \n"
|
||||
" __global const float4* vertices,\n"
|
||||
" __global const float4* uniqueEdges,\n"
|
||||
" __global const btGpuFace* faces,\n"
|
||||
" __global const b3GpuFace* faces,\n"
|
||||
" __global const int* indices,\n"
|
||||
" __global btAabbCL* aabbs,\n"
|
||||
" __global const btGpuChildShape* gpuChildShapes,\n"
|
||||
" __global b3AabbCL* aabbs,\n"
|
||||
" __global const b3GpuChildShape* gpuChildShapes,\n"
|
||||
" __global volatile float4* gpuCompoundSepNormalsOut,\n"
|
||||
" __global volatile int* gpuHasCompoundSepNormalsOut,\n"
|
||||
" int numCompoundPairs\n"
|
||||
@@ -760,14 +760,14 @@ static const char* satKernelsCL= \
|
||||
"// work-in-progress\n"
|
||||
"__kernel void findCompoundPairsKernel( __global const int2* pairs, \n"
|
||||
" __global const BodyData* rigidBodies, \n"
|
||||
" __global const btCollidableGpu* collidables,\n"
|
||||
" __global const b3CollidableGpu* collidables,\n"
|
||||
" __global const ConvexPolyhedronCL* convexShapes, \n"
|
||||
" __global const float4* vertices,\n"
|
||||
" __global const float4* uniqueEdges,\n"
|
||||
" __global const btGpuFace* faces,\n"
|
||||
" __global const b3GpuFace* faces,\n"
|
||||
" __global const int* indices,\n"
|
||||
" __global btAabbCL* aabbs,\n"
|
||||
" __global const btGpuChildShape* gpuChildShapes,\n"
|
||||
" __global b3AabbCL* aabbs,\n"
|
||||
" __global const b3GpuChildShape* gpuChildShapes,\n"
|
||||
" __global volatile int4* gpuCompoundPairsOut,\n"
|
||||
" __global volatile int* numCompoundPairsOut,\n"
|
||||
" int numPairs,\n"
|
||||
@@ -942,13 +942,13 @@ static const char* satKernelsCL= \
|
||||
"// work-in-progress\n"
|
||||
"__kernel void findSeparatingAxisKernel( __global const int2* pairs, \n"
|
||||
" __global const BodyData* rigidBodies, \n"
|
||||
" __global const btCollidableGpu* collidables,\n"
|
||||
" __global const b3CollidableGpu* collidables,\n"
|
||||
" __global const ConvexPolyhedronCL* convexShapes, \n"
|
||||
" __global const float4* vertices,\n"
|
||||
" __global const float4* uniqueEdges,\n"
|
||||
" __global const btGpuFace* faces,\n"
|
||||
" __global const b3GpuFace* faces,\n"
|
||||
" __global const int* indices,\n"
|
||||
" __global btAabbCL* aabbs,\n"
|
||||
" __global b3AabbCL* aabbs,\n"
|
||||
" __global volatile float4* separatingNormals,\n"
|
||||
" __global volatile int* hasSeparatingAxis,\n"
|
||||
" int numPairs\n"
|
||||
@@ -1056,14 +1056,14 @@ static const char* satKernelsCL= \
|
||||
"// work-in-progress\n"
|
||||
"__kernel void findConcaveSeparatingAxisKernel( __global int4* concavePairs,\n"
|
||||
" __global const BodyData* rigidBodies,\n"
|
||||
" __global const btCollidableGpu* collidables,\n"
|
||||
" __global const b3CollidableGpu* collidables,\n"
|
||||
" __global const ConvexPolyhedronCL* convexShapes, \n"
|
||||
" __global const float4* vertices,\n"
|
||||
" __global const float4* uniqueEdges,\n"
|
||||
" __global const btGpuFace* faces,\n"
|
||||
" __global const b3GpuFace* faces,\n"
|
||||
" __global const int* indices,\n"
|
||||
" __global const btGpuChildShape* gpuChildShapes,\n"
|
||||
" __global btAabbCL* aabbs,\n"
|
||||
" __global const b3GpuChildShape* gpuChildShapes,\n"
|
||||
" __global b3AabbCL* aabbs,\n"
|
||||
" __global float4* concaveSeparatingNormalsOut,\n"
|
||||
" int numConcavePairs\n"
|
||||
" )\n"
|
||||
@@ -1106,9 +1106,9 @@ static const char* satKernelsCL= \
|
||||
" convexPolyhedronA.m_vertexOffset = 0;\n"
|
||||
" float4 localCenter = make_float4(0.f,0.f,0.f,0.f);\n"
|
||||
"\n"
|
||||
" btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n"
|
||||
" b3GpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n"
|
||||
" float4 triMinAabb, triMaxAabb;\n"
|
||||
" btAabbCL triAabb;\n"
|
||||
" b3AabbCL triAabb;\n"
|
||||
" triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f);\n"
|
||||
" triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f);\n"
|
||||
" \n"
|
||||
@@ -1153,7 +1153,7 @@ static const char* satKernelsCL= \
|
||||
" \n"
|
||||
" float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n"
|
||||
" \n"
|
||||
" btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];\n"
|
||||
" b3GpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];\n"
|
||||
" int indicesA[3+3+2+2+2];\n"
|
||||
" int curUsedIndices=0;\n"
|
||||
" int fidx=0;\n"
|
||||
|
||||
@@ -18,10 +18,10 @@ subject to the following restrictions:
|
||||
#include "../host/b3ConvexHullContact.h"
|
||||
|
||||
#include "Bullet3Common/b3Vector3.h"
|
||||
#include "parallel_primitives/host/btFillCL.h"
|
||||
#include "parallel_primitives/host/btBoundSearchCL.h"
|
||||
#include "parallel_primitives/host/btRadixSort32CL.h"
|
||||
#include "parallel_primitives/host/btPrefixScanCL.h"
|
||||
#include "parallel_primitives/host/b3FillCL.h"
|
||||
#include "parallel_primitives/host/b3BoundSearchCL.h"
|
||||
#include "parallel_primitives/host/b3RadixSort32CL.h"
|
||||
#include "parallel_primitives/host/b3PrefixScanCL.h"
|
||||
#include "Bullet3Common/b3CommandLineArgs.h"
|
||||
#include "../host/b3ConvexHullContact.h"
|
||||
|
||||
@@ -54,7 +54,7 @@ void initCL(int preferredDeviceIndex, int preferredPlatformIndex)
|
||||
int numDev = b3OpenCLUtils::getNumDevices(g_context);
|
||||
if (numDev>0)
|
||||
{
|
||||
btOpenCLDeviceInfo info;
|
||||
b3OpenCLDeviceInfo info;
|
||||
g_device= b3OpenCLUtils::getDevice(g_context,0);
|
||||
g_queue = clCreateCommandQueue(g_context, g_device, 0, &ciErrNum);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#ifndef BT_CONFIG_H
|
||||
#define BT_CONFIG_H
|
||||
#ifndef B3_CONFIG_H
|
||||
#define B3_CONFIG_H
|
||||
|
||||
struct b3Config
|
||||
{
|
||||
@@ -36,5 +36,5 @@ struct b3Config
|
||||
};
|
||||
|
||||
|
||||
#endif//BT_CONFIG_H
|
||||
#endif//B3_CONFIG_H
|
||||
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
|
||||
|
||||
#include "b3GpuBatchingPgsSolver.h"
|
||||
#include "../../parallel_primitives/host/btRadixSort32CL.h"
|
||||
#include "../../parallel_primitives/host/b3RadixSort32CL.h"
|
||||
#include "Bullet3Common/b3Quickprof.h"
|
||||
#include "../../parallel_primitives/host/btLauncherCL.h"
|
||||
#include "../../parallel_primitives/host/btBoundSearchCL.h"
|
||||
#include "../../parallel_primitives/host/btPrefixScanCL.h"
|
||||
#include "../../parallel_primitives/host/b3LauncherCL.h"
|
||||
#include "../../parallel_primitives/host/b3BoundSearchCL.h"
|
||||
#include "../../parallel_primitives/host/b3PrefixScanCL.h"
|
||||
#include <string.h>
|
||||
#include "../../basic_initialize/b3OpenCLUtils.h"
|
||||
#include "../host/b3Config.h"
|
||||
@@ -30,10 +30,10 @@
|
||||
|
||||
enum
|
||||
{
|
||||
BT_SOLVER_N_SPLIT = 16,
|
||||
BT_SOLVER_N_BATCHES = 4,
|
||||
BT_SOLVER_N_OBJ_PER_SPLIT = 10,
|
||||
BT_SOLVER_N_TASKS_PER_BATCH = BT_SOLVER_N_SPLIT*BT_SOLVER_N_SPLIT,
|
||||
B3_SOLVER_N_SPLIT = 16,
|
||||
B3_SOLVER_N_BATCHES = 4,
|
||||
B3_SOLVER_N_OBJ_PER_SPLIT = 10,
|
||||
B3_SOLVER_N_TASKS_PER_BATCH = B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT,
|
||||
};
|
||||
|
||||
|
||||
@@ -41,7 +41,7 @@ bool gpuBatchContacts = true;//true;
|
||||
bool gpuSolveConstraint = true;//true;
|
||||
|
||||
|
||||
struct btGpuBatchingPgsSolverInternalData
|
||||
struct b3GpuBatchingPgsSolverInternalData
|
||||
{
|
||||
cl_context m_context;
|
||||
cl_device_id m_device;
|
||||
@@ -49,9 +49,9 @@ struct btGpuBatchingPgsSolverInternalData
|
||||
int m_pairCapacity;
|
||||
int m_nIterations;
|
||||
|
||||
btOpenCLArray<b3GpuConstraint4>* m_contactCGPU;
|
||||
btOpenCLArray<unsigned int>* m_numConstraints;
|
||||
btOpenCLArray<unsigned int>* m_offsets;
|
||||
b3OpenCLArray<b3GpuConstraint4>* m_contactCGPU;
|
||||
b3OpenCLArray<unsigned int>* m_numConstraints;
|
||||
b3OpenCLArray<unsigned int>* m_offsets;
|
||||
|
||||
b3Solver* m_solverGPU;
|
||||
|
||||
@@ -64,20 +64,20 @@ struct btGpuBatchingPgsSolverInternalData
|
||||
cl_kernel m_reorderContactKernel;
|
||||
cl_kernel m_copyConstraintKernel;
|
||||
|
||||
class btRadixSort32CL* m_sort32;
|
||||
class btBoundSearchCL* m_search;
|
||||
class btPrefixScanCL* m_scan;
|
||||
class b3RadixSort32CL* m_sort32;
|
||||
class b3BoundSearchCL* m_search;
|
||||
class b3PrefixScanCL* m_scan;
|
||||
|
||||
btOpenCLArray<btSortData>* m_sortDataBuffer;
|
||||
btOpenCLArray<b3Contact4>* m_contactBuffer;
|
||||
b3OpenCLArray<b3SortData>* m_sortDataBuffer;
|
||||
b3OpenCLArray<b3Contact4>* m_contactBuffer;
|
||||
|
||||
btOpenCLArray<b3RigidBodyCL>* m_bodyBufferGPU;
|
||||
btOpenCLArray<btInertiaCL>* m_inertiaBufferGPU;
|
||||
btOpenCLArray<b3Contact4>* m_pBufContactOutGPU;
|
||||
b3OpenCLArray<b3RigidBodyCL>* m_bodyBufferGPU;
|
||||
b3OpenCLArray<b3InertiaCL>* m_inertiaBufferGPU;
|
||||
b3OpenCLArray<b3Contact4>* m_pBufContactOutGPU;
|
||||
|
||||
|
||||
b3AlignedObjectArray<unsigned int> m_idxBuffer;
|
||||
b3AlignedObjectArray<btSortData> m_sortData;
|
||||
b3AlignedObjectArray<b3SortData> m_sortData;
|
||||
b3AlignedObjectArray<b3Contact4> m_old;
|
||||
};
|
||||
|
||||
@@ -85,35 +85,35 @@ struct btGpuBatchingPgsSolverInternalData
|
||||
|
||||
b3GpuBatchingPgsSolver::b3GpuBatchingPgsSolver(cl_context ctx,cl_device_id device, cl_command_queue q,int pairCapacity)
|
||||
{
|
||||
m_data = new btGpuBatchingPgsSolverInternalData;
|
||||
m_data = new b3GpuBatchingPgsSolverInternalData;
|
||||
m_data->m_context = ctx;
|
||||
m_data->m_device = device;
|
||||
m_data->m_queue = q;
|
||||
m_data->m_pairCapacity = pairCapacity;
|
||||
m_data->m_nIterations = 4;
|
||||
|
||||
m_data->m_bodyBufferGPU = new btOpenCLArray<b3RigidBodyCL>(ctx,q);
|
||||
m_data->m_inertiaBufferGPU = new btOpenCLArray<btInertiaCL>(ctx,q);
|
||||
m_data->m_pBufContactOutGPU = new btOpenCLArray<b3Contact4>(ctx,q);
|
||||
m_data->m_bodyBufferGPU = new b3OpenCLArray<b3RigidBodyCL>(ctx,q);
|
||||
m_data->m_inertiaBufferGPU = new b3OpenCLArray<b3InertiaCL>(ctx,q);
|
||||
m_data->m_pBufContactOutGPU = new b3OpenCLArray<b3Contact4>(ctx,q);
|
||||
|
||||
m_data->m_solverGPU = new b3Solver(ctx,device,q,512*1024);
|
||||
|
||||
m_data->m_sort32 = new btRadixSort32CL(ctx,device,m_data->m_queue);
|
||||
m_data->m_scan = new btPrefixScanCL(ctx,device,m_data->m_queue,BT_SOLVER_N_SPLIT*BT_SOLVER_N_SPLIT);
|
||||
m_data->m_search = new btBoundSearchCL(ctx,device,m_data->m_queue,BT_SOLVER_N_SPLIT*BT_SOLVER_N_SPLIT);
|
||||
m_data->m_sort32 = new b3RadixSort32CL(ctx,device,m_data->m_queue);
|
||||
m_data->m_scan = new b3PrefixScanCL(ctx,device,m_data->m_queue,B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT);
|
||||
m_data->m_search = new b3BoundSearchCL(ctx,device,m_data->m_queue,B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT);
|
||||
|
||||
const int sortSize = BTNEXTMULTIPLEOF( pairCapacity, 512 );
|
||||
const int sortSize = B3NEXTMULTIPLEOF( pairCapacity, 512 );
|
||||
|
||||
m_data->m_sortDataBuffer = new btOpenCLArray<btSortData>(ctx,m_data->m_queue,sortSize);
|
||||
m_data->m_contactBuffer = new btOpenCLArray<b3Contact4>(ctx,m_data->m_queue);
|
||||
m_data->m_sortDataBuffer = new b3OpenCLArray<b3SortData>(ctx,m_data->m_queue,sortSize);
|
||||
m_data->m_contactBuffer = new b3OpenCLArray<b3Contact4>(ctx,m_data->m_queue);
|
||||
|
||||
m_data->m_numConstraints = new btOpenCLArray<unsigned int>(ctx,m_data->m_queue,BT_SOLVER_N_SPLIT*BT_SOLVER_N_SPLIT );
|
||||
m_data->m_numConstraints->resize(BT_SOLVER_N_SPLIT*BT_SOLVER_N_SPLIT);
|
||||
m_data->m_numConstraints = new b3OpenCLArray<unsigned int>(ctx,m_data->m_queue,B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT );
|
||||
m_data->m_numConstraints->resize(B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT);
|
||||
|
||||
m_data->m_contactCGPU = new btOpenCLArray<b3GpuConstraint4>(ctx,q,pairCapacity);
|
||||
m_data->m_contactCGPU = new b3OpenCLArray<b3GpuConstraint4>(ctx,q,pairCapacity);
|
||||
|
||||
m_data->m_offsets = new btOpenCLArray<unsigned int>( ctx,m_data->m_queue, BT_SOLVER_N_SPLIT*BT_SOLVER_N_SPLIT );
|
||||
m_data->m_offsets->resize(BT_SOLVER_N_SPLIT*BT_SOLVER_N_SPLIT);
|
||||
m_data->m_offsets = new b3OpenCLArray<unsigned int>( ctx,m_data->m_queue, B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT );
|
||||
m_data->m_offsets->resize(B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT);
|
||||
const char* additionalMacros = "";
|
||||
const char* srcFileNameForCaching="";
|
||||
|
||||
@@ -132,54 +132,54 @@ b3GpuBatchingPgsSolver::b3GpuBatchingPgsSolver(cl_context ctx,cl_device_id devic
|
||||
{
|
||||
|
||||
cl_program solveContactProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveContactSource, &pErrNum,additionalMacros, SOLVER_CONTACT_KERNEL_PATH);
|
||||
btAssert(solveContactProg);
|
||||
b3Assert(solveContactProg);
|
||||
|
||||
cl_program solveFrictionProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveFrictionSource, &pErrNum,additionalMacros, SOLVER_FRICTION_KERNEL_PATH);
|
||||
btAssert(solveFrictionProg);
|
||||
b3Assert(solveFrictionProg);
|
||||
|
||||
cl_program solverSetup2Prog= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solverSetup2Source, &pErrNum,additionalMacros, SOLVER_SETUP2_KERNEL_PATH);
|
||||
btAssert(solverSetup2Prog);
|
||||
b3Assert(solverSetup2Prog);
|
||||
|
||||
|
||||
cl_program solverSetupProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solverSetupSource, &pErrNum,additionalMacros, SOLVER_SETUP_KERNEL_PATH);
|
||||
btAssert(solverSetupProg);
|
||||
b3Assert(solverSetupProg);
|
||||
|
||||
|
||||
m_data->m_solveFrictionKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, solveFrictionSource, "BatchSolveKernelFriction", &pErrNum, solveFrictionProg,additionalMacros );
|
||||
btAssert(m_data->m_solveFrictionKernel);
|
||||
b3Assert(m_data->m_solveFrictionKernel);
|
||||
|
||||
m_data->m_solveContactKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, solveContactSource, "BatchSolveKernelContact", &pErrNum, solveContactProg,additionalMacros );
|
||||
btAssert(m_data->m_solveContactKernel);
|
||||
b3Assert(m_data->m_solveContactKernel);
|
||||
|
||||
m_data->m_contactToConstraintKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetupSource, "ContactToConstraintKernel", &pErrNum, solverSetupProg,additionalMacros );
|
||||
btAssert(m_data->m_contactToConstraintKernel);
|
||||
b3Assert(m_data->m_contactToConstraintKernel);
|
||||
|
||||
m_data->m_setSortDataKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "SetSortDataKernel", &pErrNum, solverSetup2Prog,additionalMacros );
|
||||
btAssert(m_data->m_setSortDataKernel);
|
||||
b3Assert(m_data->m_setSortDataKernel);
|
||||
|
||||
m_data->m_reorderContactKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "ReorderContactKernel", &pErrNum, solverSetup2Prog,additionalMacros );
|
||||
btAssert(m_data->m_reorderContactKernel);
|
||||
b3Assert(m_data->m_reorderContactKernel);
|
||||
|
||||
|
||||
m_data->m_copyConstraintKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "CopyConstraintKernel", &pErrNum, solverSetup2Prog,additionalMacros );
|
||||
btAssert(m_data->m_copyConstraintKernel);
|
||||
b3Assert(m_data->m_copyConstraintKernel);
|
||||
|
||||
}
|
||||
|
||||
{
|
||||
cl_program batchingProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelSource, &pErrNum,additionalMacros, BATCHING_PATH);
|
||||
btAssert(batchingProg);
|
||||
b3Assert(batchingProg);
|
||||
|
||||
m_data->m_batchingKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelSource, "CreateBatches", &pErrNum, batchingProg,additionalMacros );
|
||||
btAssert(m_data->m_batchingKernel);
|
||||
b3Assert(m_data->m_batchingKernel);
|
||||
}
|
||||
|
||||
{
|
||||
cl_program batchingNewProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelNewSource, &pErrNum,additionalMacros, BATCHING_NEW_PATH);
|
||||
btAssert(batchingNewProg);
|
||||
b3Assert(batchingNewProg);
|
||||
|
||||
m_data->m_batchingKernelNew = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesNew", &pErrNum, batchingNewProg,additionalMacros );
|
||||
btAssert(m_data->m_batchingKernelNew);
|
||||
b3Assert(m_data->m_batchingKernelNew);
|
||||
}
|
||||
|
||||
|
||||
@@ -216,9 +216,9 @@ b3GpuBatchingPgsSolver::~b3GpuBatchingPgsSolver()
|
||||
|
||||
|
||||
|
||||
struct btConstraintCfg
|
||||
struct b3ConstraintCfg
|
||||
{
|
||||
btConstraintCfg( float dt = 0.f ): m_positionDrift( 0.005f ), m_positionConstraintCoeff( 0.2f ), m_dt(dt), m_staticIdx(0) {}
|
||||
b3ConstraintCfg( float dt = 0.f ): m_positionDrift( 0.005f ), m_positionConstraintCoeff( 0.2f ), m_dt(dt), m_staticIdx(0) {}
|
||||
|
||||
float m_positionDrift;
|
||||
float m_positionConstraintCoeff;
|
||||
@@ -232,34 +232,34 @@ struct btConstraintCfg
|
||||
|
||||
|
||||
|
||||
void b3GpuBatchingPgsSolver::solveContactConstraint( const btOpenCLArray<b3RigidBodyCL>* bodyBuf, const btOpenCLArray<btInertiaCL>* shapeBuf,
|
||||
btOpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches,int numIterations)
|
||||
void b3GpuBatchingPgsSolver::solveContactConstraint( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf, const b3OpenCLArray<b3InertiaCL>* shapeBuf,
|
||||
b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches,int numIterations)
|
||||
{
|
||||
|
||||
|
||||
btInt4 cdata = btMakeInt4( n, 0, 0, 0 );
|
||||
b3Int4 cdata = b3MakeInt4( n, 0, 0, 0 );
|
||||
{
|
||||
|
||||
const int nn = BT_SOLVER_N_SPLIT*BT_SOLVER_N_SPLIT;
|
||||
const int nn = B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT;
|
||||
|
||||
cdata.x = 0;
|
||||
cdata.y = maxNumBatches;//250;
|
||||
|
||||
|
||||
int numWorkItems = 64*nn/BT_SOLVER_N_BATCHES;
|
||||
int numWorkItems = 64*nn/B3_SOLVER_N_BATCHES;
|
||||
#ifdef DEBUG_ME
|
||||
SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems];
|
||||
adl::btOpenCLArray<SolverDebugInfo> gpuDebugInfo(data->m_device,numWorkItems);
|
||||
adl::b3OpenCLArray<SolverDebugInfo> gpuDebugInfo(data->m_device,numWorkItems);
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
{
|
||||
|
||||
BT_PROFILE("m_batchSolveKernel iterations");
|
||||
B3_PROFILE("m_batchSolveKernel iterations");
|
||||
for(int iter=0; iter<numIterations; iter++)
|
||||
{
|
||||
for(int ib=0; ib<BT_SOLVER_N_BATCHES; ib++)
|
||||
for(int ib=0; ib<B3_SOLVER_N_BATCHES; ib++)
|
||||
{
|
||||
#ifdef DEBUG_ME
|
||||
memset(debugInfo,0,sizeof(SolverDebugInfo)*numWorkItems);
|
||||
@@ -268,26 +268,26 @@ void b3GpuBatchingPgsSolver::solveContactConstraint( const btOpenCLArray<b3Rigi
|
||||
|
||||
|
||||
cdata.z = ib;
|
||||
cdata.w = BT_SOLVER_N_SPLIT;
|
||||
cdata.w = B3_SOLVER_N_SPLIT;
|
||||
|
||||
btLauncherCL launcher( m_data->m_queue, m_data->m_solveContactKernel );
|
||||
b3LauncherCL launcher( m_data->m_queue, m_data->m_solveContactKernel );
|
||||
#if 1
|
||||
|
||||
btBufferInfoCL bInfo[] = {
|
||||
b3BufferInfoCL bInfo[] = {
|
||||
|
||||
btBufferInfoCL( bodyBuf->getBufferCL() ),
|
||||
btBufferInfoCL( shapeBuf->getBufferCL() ),
|
||||
btBufferInfoCL( constraint->getBufferCL() ),
|
||||
btBufferInfoCL( m_data->m_numConstraints->getBufferCL() ),
|
||||
btBufferInfoCL( m_data->m_offsets->getBufferCL() )
|
||||
b3BufferInfoCL( bodyBuf->getBufferCL() ),
|
||||
b3BufferInfoCL( shapeBuf->getBufferCL() ),
|
||||
b3BufferInfoCL( constraint->getBufferCL() ),
|
||||
b3BufferInfoCL( m_data->m_numConstraints->getBufferCL() ),
|
||||
b3BufferInfoCL( m_data->m_offsets->getBufferCL() )
|
||||
#ifdef DEBUG_ME
|
||||
, btBufferInfoCL(&gpuDebugInfo)
|
||||
, b3BufferInfoCL(&gpuDebugInfo)
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
//launcher.setConst( cdata.x );
|
||||
launcher.setConst( cdata.y );
|
||||
launcher.setConst( cdata.z );
|
||||
@@ -352,32 +352,32 @@ void b3GpuBatchingPgsSolver::solveContactConstraint( const btOpenCLArray<b3Rigi
|
||||
bool applyFriction=true;
|
||||
if (applyFriction)
|
||||
{
|
||||
BT_PROFILE("m_batchSolveKernel iterations2");
|
||||
B3_PROFILE("m_batchSolveKernel iterations2");
|
||||
for(int iter=0; iter<numIterations; iter++)
|
||||
{
|
||||
for(int ib=0; ib<BT_SOLVER_N_BATCHES; ib++)
|
||||
for(int ib=0; ib<B3_SOLVER_N_BATCHES; ib++)
|
||||
{
|
||||
cdata.z = ib;
|
||||
cdata.w = BT_SOLVER_N_SPLIT;
|
||||
cdata.w = B3_SOLVER_N_SPLIT;
|
||||
|
||||
btBufferInfoCL bInfo[] = {
|
||||
btBufferInfoCL( bodyBuf->getBufferCL() ),
|
||||
btBufferInfoCL( shapeBuf->getBufferCL() ),
|
||||
btBufferInfoCL( constraint->getBufferCL() ),
|
||||
btBufferInfoCL( m_data->m_numConstraints->getBufferCL() ),
|
||||
btBufferInfoCL( m_data->m_offsets->getBufferCL() )
|
||||
b3BufferInfoCL bInfo[] = {
|
||||
b3BufferInfoCL( bodyBuf->getBufferCL() ),
|
||||
b3BufferInfoCL( shapeBuf->getBufferCL() ),
|
||||
b3BufferInfoCL( constraint->getBufferCL() ),
|
||||
b3BufferInfoCL( m_data->m_numConstraints->getBufferCL() ),
|
||||
b3BufferInfoCL( m_data->m_offsets->getBufferCL() )
|
||||
#ifdef DEBUG_ME
|
||||
,btBufferInfoCL(&gpuDebugInfo)
|
||||
,b3BufferInfoCL(&gpuDebugInfo)
|
||||
#endif //DEBUG_ME
|
||||
};
|
||||
btLauncherCL launcher( m_data->m_queue, m_data->m_solveFrictionKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3LauncherCL launcher( m_data->m_queue, m_data->m_solveFrictionKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
//launcher.setConst( cdata.x );
|
||||
launcher.setConst( cdata.y );
|
||||
launcher.setConst( cdata.z );
|
||||
launcher.setConst( cdata.w );
|
||||
|
||||
launcher.launch1D( 64*nn/BT_SOLVER_N_BATCHES, 64 );
|
||||
launcher.launch1D( 64*nn/B3_SOLVER_N_BATCHES, 64 );
|
||||
}
|
||||
}
|
||||
clFinish(m_data->m_queue);
|
||||
@@ -417,17 +417,17 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
if (useSolver)
|
||||
{
|
||||
float dt=1./60.;
|
||||
btConstraintCfg csCfg( dt );
|
||||
b3ConstraintCfg csCfg( dt );
|
||||
csCfg.m_enableParallelSolve = true;
|
||||
csCfg.m_averageExtent = .2f;//@TODO m_averageObjExtent;
|
||||
csCfg.m_staticIdx = 0;//m_static0Index;//m_planeBodyIndex;
|
||||
|
||||
|
||||
btOpenCLArray<b3RigidBodyCL>* bodyBuf = m_data->m_bodyBufferGPU;
|
||||
b3OpenCLArray<b3RigidBodyCL>* bodyBuf = m_data->m_bodyBufferGPU;
|
||||
|
||||
void* additionalData = 0;//m_data->m_frictionCGPU;
|
||||
const btOpenCLArray<btInertiaCL>* shapeBuf = m_data->m_inertiaBufferGPU;
|
||||
btOpenCLArray<b3GpuConstraint4>* contactConstraintOut = m_data->m_contactCGPU;
|
||||
const b3OpenCLArray<b3InertiaCL>* shapeBuf = m_data->m_inertiaBufferGPU;
|
||||
b3OpenCLArray<b3GpuConstraint4>* contactConstraintOut = m_data->m_contactCGPU;
|
||||
int nContacts = nContactOut;
|
||||
|
||||
|
||||
@@ -442,7 +442,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
|
||||
if( m_data->m_solverGPU->m_contactBuffer2 == 0 )
|
||||
{
|
||||
m_data->m_solverGPU->m_contactBuffer2 = new btOpenCLArray<b3Contact4>(m_data->m_context,m_data->m_queue, nContacts );
|
||||
m_data->m_solverGPU->m_contactBuffer2 = new b3OpenCLArray<b3Contact4>(m_data->m_context,m_data->m_queue, nContacts );
|
||||
m_data->m_solverGPU->m_contactBuffer2->resize(nContacts);
|
||||
}
|
||||
|
||||
@@ -451,31 +451,31 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
|
||||
|
||||
{
|
||||
BT_PROFILE("batching");
|
||||
B3_PROFILE("batching");
|
||||
//@todo: just reserve it, without copy of original contact (unless we use warmstarting)
|
||||
|
||||
|
||||
|
||||
const btOpenCLArray<b3RigidBodyCL>* bodyNative = bodyBuf;
|
||||
const b3OpenCLArray<b3RigidBodyCL>* bodyNative = bodyBuf;
|
||||
|
||||
|
||||
{
|
||||
|
||||
//btOpenCLArray<b3RigidBodyCL>* bodyNative = btOpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, bodyBuf );
|
||||
//btOpenCLArray<b3Contact4>* contactNative = btOpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, contactsIn );
|
||||
//b3OpenCLArray<b3RigidBodyCL>* bodyNative = b3OpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, bodyBuf );
|
||||
//b3OpenCLArray<b3Contact4>* contactNative = b3OpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, contactsIn );
|
||||
|
||||
const int sortAlignment = 512; // todo. get this out of sort
|
||||
if( csCfg.m_enableParallelSolve )
|
||||
{
|
||||
|
||||
|
||||
int sortSize = BTNEXTMULTIPLEOF( nContacts, sortAlignment );
|
||||
int sortSize = B3NEXTMULTIPLEOF( nContacts, sortAlignment );
|
||||
|
||||
btOpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints;
|
||||
btOpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets;
|
||||
b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints;
|
||||
b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets;
|
||||
|
||||
{ // 2. set cell idx
|
||||
BT_PROFILE("GPU set cell idx");
|
||||
B3_PROFILE("GPU set cell idx");
|
||||
struct CB
|
||||
{
|
||||
int m_nContacts;
|
||||
@@ -484,19 +484,19 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
int m_nSplit;
|
||||
};
|
||||
|
||||
btAssert( sortSize%64 == 0 );
|
||||
b3Assert( sortSize%64 == 0 );
|
||||
CB cdata;
|
||||
cdata.m_nContacts = nContacts;
|
||||
cdata.m_staticIdx = csCfg.m_staticIdx;
|
||||
cdata.m_scale = 1.f/(BT_SOLVER_N_OBJ_PER_SPLIT*csCfg.m_averageExtent);
|
||||
cdata.m_nSplit = BT_SOLVER_N_SPLIT;
|
||||
cdata.m_scale = 1.f/(B3_SOLVER_N_OBJ_PER_SPLIT*csCfg.m_averageExtent);
|
||||
cdata.m_nSplit = B3_SOLVER_N_SPLIT;
|
||||
|
||||
m_data->m_solverGPU->m_sortDataBuffer->resize(nContacts);
|
||||
|
||||
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), btBufferInfoCL( bodyBuf->getBufferCL()), btBufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) };
|
||||
btLauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_setSortDataKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL()), b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) };
|
||||
b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_setSortDataKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( cdata.m_nContacts );
|
||||
launcher.setConst( cdata.m_scale );
|
||||
launcher.setConst(cdata.m_nSplit);
|
||||
@@ -509,17 +509,17 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
bool gpuRadixSort=true;
|
||||
if (gpuRadixSort)
|
||||
{ // 3. sort by cell idx
|
||||
BT_PROFILE("gpuRadixSort");
|
||||
int n = BT_SOLVER_N_SPLIT*BT_SOLVER_N_SPLIT;
|
||||
B3_PROFILE("gpuRadixSort");
|
||||
int n = B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT;
|
||||
int sortBit = 32;
|
||||
//if( n <= 0xffff ) sortBit = 16;
|
||||
//if( n <= 0xff ) sortBit = 8;
|
||||
//adl::RadixSort<adl::TYPE_CL>::execute( data->m_sort, *data->m_sortDataBuffer, sortSize );
|
||||
//adl::RadixSort32<adl::TYPE_CL>::execute( data->m_sort32, *data->m_sortDataBuffer, sortSize );
|
||||
btOpenCLArray<btSortData>& keyValuesInOut = *(m_data->m_solverGPU->m_sortDataBuffer);
|
||||
b3OpenCLArray<b3SortData>& keyValuesInOut = *(m_data->m_solverGPU->m_sortDataBuffer);
|
||||
this->m_data->m_solverGPU->m_sort32->execute(keyValuesInOut);
|
||||
|
||||
/*b3AlignedObjectArray<btSortData> hostValues;
|
||||
/*b3AlignedObjectArray<b3SortData> hostValues;
|
||||
keyValuesInOut.copyToHost(hostValues);
|
||||
printf("hostValues.size=%d\n",hostValues.size());
|
||||
*/
|
||||
@@ -528,17 +528,17 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
|
||||
{
|
||||
// 4. find entries
|
||||
BT_PROFILE("gpuBoundSearch");
|
||||
B3_PROFILE("gpuBoundSearch");
|
||||
|
||||
m_data->m_solverGPU->m_search->execute(*m_data->m_solverGPU->m_sortDataBuffer,nContacts,*countsNative,
|
||||
BT_SOLVER_N_SPLIT*BT_SOLVER_N_SPLIT,btBoundSearchCL::COUNT);
|
||||
B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT,b3BoundSearchCL::COUNT);
|
||||
|
||||
|
||||
//adl::BoundSearch<adl::TYPE_CL>::execute( data->m_search, *data->m_sortDataBuffer, nContacts, *countsNative,
|
||||
// BT_SOLVER_N_SPLIT*BT_SOLVER_N_SPLIT, adl::BoundSearchBase::COUNT );
|
||||
// B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT, adl::BoundSearchBase::COUNT );
|
||||
|
||||
//unsigned int sum;
|
||||
m_data->m_solverGPU->m_scan->execute(*countsNative,*offsetsNative, BT_SOLVER_N_SPLIT*BT_SOLVER_N_SPLIT);//,&sum );
|
||||
m_data->m_solverGPU->m_scan->execute(*countsNative,*offsetsNative, B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT);//,&sum );
|
||||
//printf("sum = %d\n",sum);
|
||||
}
|
||||
|
||||
@@ -548,15 +548,15 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
if (nContacts)
|
||||
{ // 5. sort constraints by cellIdx
|
||||
{
|
||||
BT_PROFILE("gpu m_reorderContactKernel");
|
||||
B3_PROFILE("gpu m_reorderContactKernel");
|
||||
|
||||
btInt4 cdata;
|
||||
b3Int4 cdata;
|
||||
cdata.x = nContacts;
|
||||
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), btBufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL())
|
||||
, btBufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) };
|
||||
btLauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL())
|
||||
, b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) };
|
||||
b3LauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( cdata );
|
||||
launcher.launch1D( nContacts, 64 );
|
||||
}
|
||||
@@ -574,11 +574,11 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
|
||||
if (nContacts)
|
||||
{
|
||||
BT_PROFILE("gpu m_copyConstraintKernel");
|
||||
btInt4 cdata; cdata.x = nContacts;
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL() ), btBufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ) };
|
||||
btLauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_copyConstraintKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
B3_PROFILE("gpu m_copyConstraintKernel");
|
||||
b3Int4 cdata; cdata.x = nContacts;
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL() ), b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ) };
|
||||
b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_copyConstraintKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( cdata );
|
||||
launcher.launch1D( nContacts, 64 );
|
||||
clFinish(m_data->m_queue);
|
||||
@@ -590,24 +590,24 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
{
|
||||
if (gpuBatchContacts)
|
||||
{
|
||||
BT_PROFILE("gpu batchContacts");
|
||||
B3_PROFILE("gpu batchContacts");
|
||||
maxNumBatches = 50;//250;
|
||||
m_data->m_solverGPU->batchContacts( m_data->m_pBufContactOutGPU, nContacts, m_data->m_solverGPU->m_numConstraints, m_data->m_solverGPU->m_offsets, csCfg.m_staticIdx );
|
||||
} else
|
||||
{
|
||||
BT_PROFILE("cpu batchContacts");
|
||||
B3_PROFILE("cpu batchContacts");
|
||||
b3AlignedObjectArray<b3Contact4> cpuContacts;
|
||||
btOpenCLArray<b3Contact4>* contactsIn = m_data->m_solverGPU->m_contactBuffer2;
|
||||
b3OpenCLArray<b3Contact4>* contactsIn = m_data->m_solverGPU->m_contactBuffer2;
|
||||
contactsIn->copyToHost(cpuContacts);
|
||||
|
||||
btOpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints;
|
||||
btOpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets;
|
||||
b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints;
|
||||
b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets;
|
||||
|
||||
b3AlignedObjectArray<unsigned int> nNativeHost;
|
||||
b3AlignedObjectArray<unsigned int> offsetsNativeHost;
|
||||
|
||||
{
|
||||
BT_PROFILE("countsNative/offsetsNative copyToHost");
|
||||
B3_PROFILE("countsNative/offsetsNative copyToHost");
|
||||
countsNative->copyToHost(nNativeHost);
|
||||
offsetsNative->copyToHost(offsetsNativeHost);
|
||||
}
|
||||
@@ -616,8 +616,8 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
int numNonzeroGrid=0;
|
||||
|
||||
{
|
||||
BT_PROFILE("batch grid");
|
||||
for(int i=0; i<BT_SOLVER_N_SPLIT*BT_SOLVER_N_SPLIT; i++)
|
||||
B3_PROFILE("batch grid");
|
||||
for(int i=0; i<B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT; i++)
|
||||
{
|
||||
int n = (nNativeHost)[i];
|
||||
int offset = (offsetsNativeHost)[i];
|
||||
@@ -633,7 +633,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
int numBatches = sortConstraintByBatch3( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies); // on GPU
|
||||
|
||||
|
||||
maxNumBatches = btMax(numBatches,maxNumBatches);
|
||||
maxNumBatches = b3Max(numBatches,maxNumBatches);
|
||||
static int globalMaxBatch = 0;
|
||||
if (maxNumBatches>globalMaxBatch )
|
||||
{
|
||||
@@ -647,7 +647,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
}
|
||||
}
|
||||
{
|
||||
BT_PROFILE("m_contactBuffer->copyFromHost");
|
||||
B3_PROFILE("m_contactBuffer->copyFromHost");
|
||||
m_data->m_solverGPU->m_contactBuffer2->copyFromHost((b3AlignedObjectArray<b3Contact4>&)cpuContacts);
|
||||
}
|
||||
|
||||
@@ -660,7 +660,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
|
||||
if (nContacts)
|
||||
{
|
||||
//BT_PROFILE("gpu convertToConstraints");
|
||||
//B3_PROFILE("gpu convertToConstraints");
|
||||
m_data->m_solverGPU->convertToConstraints( bodyBuf,
|
||||
shapeBuf, m_data->m_solverGPU->m_contactBuffer2,
|
||||
contactConstraintOut,
|
||||
@@ -682,7 +682,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
m_data->m_solverGPU->m_nIterations = 4;//10
|
||||
if (gpuSolveConstraint)
|
||||
{
|
||||
BT_PROFILE("GPU solveContactConstraint");
|
||||
B3_PROFILE("GPU solveContactConstraint");
|
||||
|
||||
m_data->m_solverGPU->solveContactConstraint(
|
||||
m_data->m_bodyBufferGPU,
|
||||
@@ -693,7 +693,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
}
|
||||
else
|
||||
{
|
||||
BT_PROFILE("Host solveContactConstraint");
|
||||
B3_PROFILE("Host solveContactConstraint");
|
||||
|
||||
m_data->m_solverGPU->solveContactConstraintHost(m_data->m_bodyBufferGPU, m_data->m_inertiaBufferGPU, m_data->m_contactCGPU,0, nContactOut ,maxNumBatches);
|
||||
}
|
||||
@@ -705,7 +705,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
#if 0
|
||||
if (0)
|
||||
{
|
||||
BT_PROFILE("read body velocities back to CPU");
|
||||
B3_PROFILE("read body velocities back to CPU");
|
||||
//read body updated linear/angular velocities back to CPU
|
||||
m_data->m_bodyBufferGPU->read(
|
||||
m_data->m_bodyBufferCPU->m_ptr,numOfConvexRBodies);
|
||||
@@ -718,13 +718,13 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
}
|
||||
|
||||
|
||||
void b3GpuBatchingPgsSolver::batchContacts( btOpenCLArray<b3Contact4>* contacts, int nContacts, btOpenCLArray<unsigned int>* n, btOpenCLArray<unsigned int>* offsets, int staticIdx )
|
||||
void b3GpuBatchingPgsSolver::batchContacts( b3OpenCLArray<b3Contact4>* contacts, int nContacts, b3OpenCLArray<unsigned int>* n, b3OpenCLArray<unsigned int>* offsets, int staticIdx )
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
|
||||
static bool sortfnc(const btSortData& a,const btSortData& b)
|
||||
static bool sortfnc(const b3SortData& a,const b3SortData& b)
|
||||
{
|
||||
return (a.m_key<b.m_key);
|
||||
}
|
||||
@@ -737,14 +737,14 @@ static bool sortfnc(const btSortData& a,const btSortData& b)
|
||||
|
||||
|
||||
b3AlignedObjectArray<unsigned int> idxBuffer;
|
||||
b3AlignedObjectArray<btSortData> sortData;
|
||||
b3AlignedObjectArray<b3SortData> sortData;
|
||||
b3AlignedObjectArray<b3Contact4> old;
|
||||
|
||||
|
||||
inline int b3GpuBatchingPgsSolver::sortConstraintByBatch( b3Contact4* cs, int n, int simdWidth , int staticIdx, int numBodies)
|
||||
{
|
||||
|
||||
BT_PROFILE("sortConstraintByBatch");
|
||||
B3_PROFILE("sortConstraintByBatch");
|
||||
int numIter = 0;
|
||||
|
||||
sortData.resize(n);
|
||||
@@ -769,7 +769,7 @@ inline int b3GpuBatchingPgsSolver::sortConstraintByBatch( b3Contact4* cs, int n,
|
||||
int batchIdx = 0;
|
||||
|
||||
{
|
||||
BT_PROFILE("cpu batch innerloop");
|
||||
B3_PROFILE("cpu batch innerloop");
|
||||
while( nIdxSrc )
|
||||
{
|
||||
numIter++;
|
||||
@@ -782,7 +782,7 @@ inline int b3GpuBatchingPgsSolver::sortConstraintByBatch( b3Contact4* cs, int n,
|
||||
for(int i=0; i<nIdxSrc; i++)
|
||||
{
|
||||
int idx = idxSrc[i];
|
||||
btAssert( idx < n );
|
||||
b3Assert( idx < n );
|
||||
// check if it can go
|
||||
int bodyAS = cs[idx].m_bodyAPtrAndSignBit;
|
||||
int bodyBS = cs[idx].m_bodyBPtrAndSignBit;
|
||||
@@ -830,19 +830,19 @@ inline int b3GpuBatchingPgsSolver::sortConstraintByBatch( b3Contact4* cs, int n,
|
||||
idxDst[nIdxDst++] = idx;
|
||||
}
|
||||
}
|
||||
btSwap( idxSrc, idxDst );
|
||||
btSwap( nIdxSrc, nIdxDst );
|
||||
b3Swap( idxSrc, idxDst );
|
||||
b3Swap( nIdxSrc, nIdxDst );
|
||||
batchIdx ++;
|
||||
}
|
||||
}
|
||||
{
|
||||
BT_PROFILE("quickSort");
|
||||
B3_PROFILE("quickSort");
|
||||
sortData.quickSort(sortfnc);
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
BT_PROFILE("reorder");
|
||||
B3_PROFILE("reorder");
|
||||
// reorder
|
||||
|
||||
memcpy( &old[0], cs, sizeof(b3Contact4)*n);
|
||||
@@ -858,7 +858,7 @@ inline int b3GpuBatchingPgsSolver::sortConstraintByBatch( b3Contact4* cs, int n,
|
||||
// debugPrintf( "nBatches: %d\n", batchIdx );
|
||||
for(int i=0; i<n; i++)
|
||||
{
|
||||
btAssert( cs[i].getBatchIdx() != -1 );
|
||||
b3Assert( cs[i].getBatchIdx() != -1 );
|
||||
}
|
||||
#endif
|
||||
return batchIdx;
|
||||
@@ -870,7 +870,7 @@ b3AlignedObjectArray<int> bodyUsed2;
|
||||
inline int b3GpuBatchingPgsSolver::sortConstraintByBatch2( b3Contact4* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies)
|
||||
{
|
||||
|
||||
BT_PROFILE("sortConstraintByBatch2");
|
||||
B3_PROFILE("sortConstraintByBatch2");
|
||||
|
||||
|
||||
|
||||
@@ -903,7 +903,7 @@ inline int b3GpuBatchingPgsSolver::sortConstraintByBatch2( b3Contact4* cs, int n
|
||||
|
||||
|
||||
{
|
||||
BT_PROFILE("cpu batch innerloop");
|
||||
B3_PROFILE("cpu batch innerloop");
|
||||
|
||||
while( numValidConstraints < numConstraints)
|
||||
{
|
||||
@@ -917,7 +917,7 @@ inline int b3GpuBatchingPgsSolver::sortConstraintByBatch2( b3Contact4* cs, int n
|
||||
for(int i=numValidConstraints; i<numConstraints; i++)
|
||||
{
|
||||
int idx = idxSrc[i];
|
||||
btAssert( idx < numConstraints );
|
||||
b3Assert( idx < numConstraints );
|
||||
// check if it can go
|
||||
int bodyAS = cs[idx].m_bodyAPtrAndSignBit;
|
||||
int bodyBS = cs[idx].m_bodyBPtrAndSignBit;
|
||||
@@ -968,7 +968,7 @@ inline int b3GpuBatchingPgsSolver::sortConstraintByBatch2( b3Contact4* cs, int n
|
||||
|
||||
if (i!=numValidConstraints)
|
||||
{
|
||||
btSwap(idxSrc[i], idxSrc[numValidConstraints]);
|
||||
b3Swap(idxSrc[i], idxSrc[numValidConstraints]);
|
||||
}
|
||||
|
||||
numValidConstraints++;
|
||||
@@ -991,19 +991,19 @@ inline int b3GpuBatchingPgsSolver::sortConstraintByBatch2( b3Contact4* cs, int n
|
||||
}
|
||||
}
|
||||
{
|
||||
BT_PROFILE("quickSort");
|
||||
B3_PROFILE("quickSort");
|
||||
//m_data->m_sortData.quickSort(sortfnc);
|
||||
}
|
||||
|
||||
{
|
||||
BT_PROFILE("reorder");
|
||||
B3_PROFILE("reorder");
|
||||
// reorder
|
||||
|
||||
memcpy( &m_data->m_old[0], cs, sizeof(b3Contact4)*numConstraints);
|
||||
|
||||
for(int i=0; i<numConstraints; i++)
|
||||
{
|
||||
btAssert(m_data->m_sortData[idxSrc[i]].m_value == idxSrc[i]);
|
||||
b3Assert(m_data->m_sortData[idxSrc[i]].m_value == idxSrc[i]);
|
||||
int idx = m_data->m_sortData[idxSrc[i]].m_value;
|
||||
cs[i] = m_data->m_old[idx];
|
||||
}
|
||||
@@ -1013,7 +1013,7 @@ inline int b3GpuBatchingPgsSolver::sortConstraintByBatch2( b3Contact4* cs, int n
|
||||
// debugPrintf( "nBatches: %d\n", batchIdx );
|
||||
for(int i=0; i<numConstraints; i++)
|
||||
{
|
||||
btAssert( cs[i].getBatchIdx() != -1 );
|
||||
b3Assert( cs[i].getBatchIdx() != -1 );
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1029,7 +1029,7 @@ b3AlignedObjectArray<int> curUsed;
|
||||
inline int b3GpuBatchingPgsSolver::sortConstraintByBatch3( b3Contact4* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies)
|
||||
{
|
||||
|
||||
BT_PROFILE("sortConstraintByBatch3");
|
||||
B3_PROFILE("sortConstraintByBatch3");
|
||||
|
||||
static int maxSwaps = 0;
|
||||
int numSwaps = 0;
|
||||
@@ -1071,7 +1071,7 @@ inline int b3GpuBatchingPgsSolver::sortConstraintByBatch3( b3Contact4* cs, int n
|
||||
|
||||
|
||||
{
|
||||
BT_PROFILE("cpu batch innerloop");
|
||||
B3_PROFILE("cpu batch innerloop");
|
||||
|
||||
while( numValidConstraints < numConstraints)
|
||||
{
|
||||
@@ -1086,7 +1086,7 @@ inline int b3GpuBatchingPgsSolver::sortConstraintByBatch3( b3Contact4* cs, int n
|
||||
for(int i=numValidConstraints; i<numConstraints; i++)
|
||||
{
|
||||
int idx = i;
|
||||
btAssert( idx < numConstraints );
|
||||
b3Assert( idx < numConstraints );
|
||||
// check if it can go
|
||||
int bodyAS = cs[idx].m_bodyAPtrAndSignBit;
|
||||
int bodyBS = cs[idx].m_bodyBPtrAndSignBit;
|
||||
@@ -1123,7 +1123,7 @@ inline int b3GpuBatchingPgsSolver::sortConstraintByBatch3( b3Contact4* cs, int n
|
||||
|
||||
if (i!=numValidConstraints)
|
||||
{
|
||||
btSwap(cs[i],cs[numValidConstraints]);
|
||||
b3Swap(cs[i],cs[numValidConstraints]);
|
||||
numSwaps++;
|
||||
}
|
||||
|
||||
@@ -1148,7 +1148,7 @@ inline int b3GpuBatchingPgsSolver::sortConstraintByBatch3( b3Contact4* cs, int n
|
||||
// debugPrintf( "nBatches: %d\n", batchIdx );
|
||||
for(int i=0; i<numConstraints; i++)
|
||||
{
|
||||
btAssert( cs[i].getBatchIdx() != -1 );
|
||||
b3Assert( cs[i].getBatchIdx() != -1 );
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
|
||||
#ifndef BT_GPU_BATCHING_PGS_SOLVER_H
|
||||
#define BT_GPU_BATCHING_PGS_SOLVER_H
|
||||
#ifndef B3_GPU_BATCHING_PGS_SOLVER_H
|
||||
#define B3_GPU_BATCHING_PGS_SOLVER_H
|
||||
|
||||
#include "../../basic_initialize/b3OpenCLInclude.h"
|
||||
#include "../../parallel_primitives/host/btOpenCLArray.h"
|
||||
#include "../../parallel_primitives/host/b3OpenCLArray.h"
|
||||
#include "Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h"
|
||||
#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h"
|
||||
#include "b3GpuConstraint4.h"
|
||||
@@ -14,9 +14,9 @@ protected:
|
||||
|
||||
|
||||
|
||||
struct btGpuBatchingPgsSolverInternalData* m_data;
|
||||
struct b3GpuBatchingPgsSolverInternalData* m_data;
|
||||
|
||||
void batchContacts( btOpenCLArray<b3Contact4>* contacts, int nContacts, btOpenCLArray<unsigned int>* n, btOpenCLArray<unsigned int>* offsets, int staticIdx );
|
||||
void batchContacts( b3OpenCLArray<b3Contact4>* contacts, int nContacts, b3OpenCLArray<unsigned int>* n, b3OpenCLArray<unsigned int>* offsets, int staticIdx );
|
||||
|
||||
inline int sortConstraintByBatch( b3Contact4* cs, int n, int simdWidth , int staticIdx, int numBodies);
|
||||
inline int sortConstraintByBatch2( b3Contact4* cs, int n, int simdWidth , int staticIdx, int numBodies);
|
||||
@@ -24,8 +24,8 @@ protected:
|
||||
|
||||
|
||||
|
||||
void solveContactConstraint( const btOpenCLArray<b3RigidBodyCL>* bodyBuf, const btOpenCLArray<btInertiaCL>* shapeBuf,
|
||||
btOpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches, int numIterations);
|
||||
void solveContactConstraint( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf, const b3OpenCLArray<b3InertiaCL>* shapeBuf,
|
||||
b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches, int numIterations);
|
||||
|
||||
public:
|
||||
|
||||
@@ -36,5 +36,5 @@ public:
|
||||
|
||||
};
|
||||
|
||||
#endif //BT_GPU_BATCHING_PGS_SOLVER_H
|
||||
#endif //B3_GPU_BATCHING_PGS_SOLVER_H
|
||||
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
|
||||
#ifndef BT_CONSTRAINT4_h
|
||||
#define BT_CONSTRAINT4_h
|
||||
#ifndef B3_CONSTRAINT4_h
|
||||
#define B3_CONSTRAINT4_h
|
||||
#include "Bullet3Common/b3Vector3.h"
|
||||
|
||||
ATTRIBUTE_ALIGNED16(struct) b3GpuConstraint4
|
||||
{
|
||||
BT_DECLARE_ALIGNED_ALLOCATOR();
|
||||
B3_DECLARE_ALIGNED_ALLOCATOR();
|
||||
|
||||
b3Vector3 m_linear;//normal?
|
||||
b3Vector3 m_worldPos[4];
|
||||
@@ -25,5 +25,5 @@ ATTRIBUTE_ALIGNED16(struct) b3GpuConstraint4
|
||||
inline float getFrictionCoeff() const { return m_linear[3]; }
|
||||
};
|
||||
|
||||
#endif //BT_CONSTRAINT4_h
|
||||
#endif //B3_CONSTRAINT4_h
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#include "b3GpuNarrowPhase.h"
|
||||
|
||||
|
||||
#include "parallel_primitives/host/btOpenCLArray.h"
|
||||
#include "parallel_primitives/host/b3OpenCLArray.h"
|
||||
#include "../../gpu_narrowphase/host/b3ConvexPolyhedronCL.h"
|
||||
#include "../../gpu_narrowphase/host/b3ConvexHullContact.h"
|
||||
#include "../../gpu_broadphase/host/b3SapAabb.h"
|
||||
@@ -12,7 +12,7 @@
|
||||
#include "Bullet3Geometry/b3AabbUtil.h"
|
||||
#include "../../gpu_narrowphase/host/b3BvhInfo.h"
|
||||
|
||||
struct btGpuNarrowPhaseInternalData
|
||||
struct b3GpuNarrowPhaseInternalData
|
||||
{
|
||||
b3AlignedObjectArray<b3ConvexUtility*>* m_convexData;
|
||||
|
||||
@@ -21,59 +21,59 @@ struct btGpuNarrowPhaseInternalData
|
||||
b3AlignedObjectArray<b3Vector3> m_convexVertices;
|
||||
b3AlignedObjectArray<int> m_convexIndices;
|
||||
|
||||
btOpenCLArray<b3ConvexPolyhedronCL>* m_convexPolyhedraGPU;
|
||||
btOpenCLArray<b3Vector3>* m_uniqueEdgesGPU;
|
||||
btOpenCLArray<b3Vector3>* m_convexVerticesGPU;
|
||||
btOpenCLArray<int>* m_convexIndicesGPU;
|
||||
b3OpenCLArray<b3ConvexPolyhedronCL>* m_convexPolyhedraGPU;
|
||||
b3OpenCLArray<b3Vector3>* m_uniqueEdgesGPU;
|
||||
b3OpenCLArray<b3Vector3>* m_convexVerticesGPU;
|
||||
b3OpenCLArray<int>* m_convexIndicesGPU;
|
||||
|
||||
btOpenCLArray<b3Vector3>* m_worldVertsB1GPU;
|
||||
btOpenCLArray<btInt4>* m_clippingFacesOutGPU;
|
||||
btOpenCLArray<b3Vector3>* m_worldNormalsAGPU;
|
||||
btOpenCLArray<b3Vector3>* m_worldVertsA1GPU;
|
||||
btOpenCLArray<b3Vector3>* m_worldVertsB2GPU;
|
||||
b3OpenCLArray<b3Vector3>* m_worldVertsB1GPU;
|
||||
b3OpenCLArray<b3Int4>* m_clippingFacesOutGPU;
|
||||
b3OpenCLArray<b3Vector3>* m_worldNormalsAGPU;
|
||||
b3OpenCLArray<b3Vector3>* m_worldVertsA1GPU;
|
||||
b3OpenCLArray<b3Vector3>* m_worldVertsB2GPU;
|
||||
|
||||
b3AlignedObjectArray<btGpuChildShape> m_cpuChildShapes;
|
||||
btOpenCLArray<btGpuChildShape>* m_gpuChildShapes;
|
||||
b3AlignedObjectArray<b3GpuChildShape> m_cpuChildShapes;
|
||||
b3OpenCLArray<b3GpuChildShape>* m_gpuChildShapes;
|
||||
|
||||
b3AlignedObjectArray<btGpuFace> m_convexFaces;
|
||||
btOpenCLArray<btGpuFace>* m_convexFacesGPU;
|
||||
b3AlignedObjectArray<b3GpuFace> m_convexFaces;
|
||||
b3OpenCLArray<b3GpuFace>* m_convexFacesGPU;
|
||||
|
||||
GpuSatCollision* m_gpuSatCollision;
|
||||
|
||||
b3AlignedObjectArray<btInt2>* m_pBufPairsCPU;
|
||||
b3AlignedObjectArray<b3Int2>* m_pBufPairsCPU;
|
||||
|
||||
btOpenCLArray<btInt2>* m_convexPairsOutGPU;
|
||||
btOpenCLArray<btInt2>* m_planePairs;
|
||||
b3OpenCLArray<b3Int2>* m_convexPairsOutGPU;
|
||||
b3OpenCLArray<b3Int2>* m_planePairs;
|
||||
|
||||
btOpenCLArray<b3Contact4>* m_pBufContactOutGPU;
|
||||
b3OpenCLArray<b3Contact4>* m_pBufContactOutGPU;
|
||||
b3AlignedObjectArray<b3Contact4>* m_pBufContactOutCPU;
|
||||
|
||||
|
||||
b3AlignedObjectArray<b3RigidBodyCL>* m_bodyBufferCPU;
|
||||
btOpenCLArray<b3RigidBodyCL>* m_bodyBufferGPU;
|
||||
b3OpenCLArray<b3RigidBodyCL>* m_bodyBufferGPU;
|
||||
|
||||
b3AlignedObjectArray<btInertiaCL>* m_inertiaBufferCPU;
|
||||
btOpenCLArray<btInertiaCL>* m_inertiaBufferGPU;
|
||||
b3AlignedObjectArray<b3InertiaCL>* m_inertiaBufferCPU;
|
||||
b3OpenCLArray<b3InertiaCL>* m_inertiaBufferGPU;
|
||||
|
||||
int m_numAcceleratedShapes;
|
||||
int m_numAcceleratedRigidBodies;
|
||||
|
||||
b3AlignedObjectArray<b3Collidable> m_collidablesCPU;
|
||||
btOpenCLArray<b3Collidable>* m_collidablesGPU;
|
||||
b3OpenCLArray<b3Collidable>* m_collidablesGPU;
|
||||
|
||||
btOpenCLArray<b3SapAabb>* m_localShapeAABBGPU;
|
||||
b3OpenCLArray<b3SapAabb>* m_localShapeAABBGPU;
|
||||
b3AlignedObjectArray<b3SapAabb>* m_localShapeAABBCPU;
|
||||
|
||||
b3AlignedObjectArray<class b3OptimizedBvh*> m_bvhData;
|
||||
|
||||
b3AlignedObjectArray<btQuantizedBvhNode> m_treeNodesCPU;
|
||||
b3AlignedObjectArray<btBvhSubtreeInfo> m_subTreesCPU;
|
||||
b3AlignedObjectArray<b3QuantizedBvhNode> m_treeNodesCPU;
|
||||
b3AlignedObjectArray<b3BvhSubtreeInfo> m_subTreesCPU;
|
||||
|
||||
b3AlignedObjectArray<b3BvhInfo> m_bvhInfoCPU;
|
||||
btOpenCLArray<b3BvhInfo>* m_bvhInfoGPU;
|
||||
b3OpenCLArray<b3BvhInfo>* m_bvhInfoGPU;
|
||||
|
||||
btOpenCLArray<btQuantizedBvhNode>* m_treeNodesGPU;
|
||||
btOpenCLArray<btBvhSubtreeInfo>* m_subTreesGPU;
|
||||
b3OpenCLArray<b3QuantizedBvhNode>* m_treeNodesGPU;
|
||||
b3OpenCLArray<b3BvhSubtreeInfo>* m_subTreesGPU;
|
||||
|
||||
|
||||
b3Config m_config;
|
||||
@@ -91,52 +91,52 @@ m_device(device),
|
||||
m_queue(queue)
|
||||
{
|
||||
|
||||
m_data = new btGpuNarrowPhaseInternalData();
|
||||
memset(m_data,0,sizeof(btGpuNarrowPhaseInternalData));
|
||||
m_data = new b3GpuNarrowPhaseInternalData();
|
||||
memset(m_data,0,sizeof(b3GpuNarrowPhaseInternalData));
|
||||
|
||||
m_data->m_config = config;
|
||||
|
||||
m_data->m_gpuSatCollision = new GpuSatCollision(ctx,device,queue);
|
||||
m_data->m_pBufPairsCPU = new b3AlignedObjectArray<btInt2>;
|
||||
m_data->m_pBufPairsCPU = new b3AlignedObjectArray<b3Int2>;
|
||||
m_data->m_pBufPairsCPU->resize(config.m_maxBroadphasePairs);
|
||||
|
||||
m_data->m_convexPairsOutGPU = new btOpenCLArray<btInt2>(ctx,queue,config.m_maxBroadphasePairs,false);
|
||||
m_data->m_planePairs = new btOpenCLArray<btInt2>(ctx,queue,config.m_maxBroadphasePairs,false);
|
||||
m_data->m_convexPairsOutGPU = new b3OpenCLArray<b3Int2>(ctx,queue,config.m_maxBroadphasePairs,false);
|
||||
m_data->m_planePairs = new b3OpenCLArray<b3Int2>(ctx,queue,config.m_maxBroadphasePairs,false);
|
||||
|
||||
m_data->m_pBufContactOutCPU = new b3AlignedObjectArray<b3Contact4>();
|
||||
m_data->m_pBufContactOutCPU->resize(config.m_maxBroadphasePairs);
|
||||
m_data->m_bodyBufferCPU = new b3AlignedObjectArray<b3RigidBodyCL>();
|
||||
m_data->m_bodyBufferCPU->resize(config.m_maxConvexBodies);
|
||||
|
||||
m_data->m_inertiaBufferCPU = new b3AlignedObjectArray<btInertiaCL>();
|
||||
m_data->m_inertiaBufferCPU = new b3AlignedObjectArray<b3InertiaCL>();
|
||||
m_data->m_inertiaBufferCPU->resize(config.m_maxConvexBodies);
|
||||
|
||||
m_data->m_pBufContactOutGPU = new btOpenCLArray<b3Contact4>(ctx,queue, config.m_maxContactCapacity,true);
|
||||
m_data->m_pBufContactOutGPU = new b3OpenCLArray<b3Contact4>(ctx,queue, config.m_maxContactCapacity,true);
|
||||
|
||||
m_data->m_inertiaBufferGPU = new btOpenCLArray<btInertiaCL>(ctx,queue,config.m_maxConvexBodies,false);
|
||||
m_data->m_collidablesGPU = new btOpenCLArray<b3Collidable>(ctx,queue,config.m_maxConvexShapes);
|
||||
m_data->m_inertiaBufferGPU = new b3OpenCLArray<b3InertiaCL>(ctx,queue,config.m_maxConvexBodies,false);
|
||||
m_data->m_collidablesGPU = new b3OpenCLArray<b3Collidable>(ctx,queue,config.m_maxConvexShapes);
|
||||
|
||||
m_data->m_localShapeAABBCPU = new b3AlignedObjectArray<b3SapAabb>;
|
||||
m_data->m_localShapeAABBGPU = new btOpenCLArray<b3SapAabb>(ctx,queue,config.m_maxConvexShapes);
|
||||
m_data->m_localShapeAABBGPU = new b3OpenCLArray<b3SapAabb>(ctx,queue,config.m_maxConvexShapes);
|
||||
|
||||
|
||||
//m_data->m_solverDataGPU = adl::Solver<adl::TYPE_CL>::allocate(ctx,queue, config.m_maxBroadphasePairs,false);
|
||||
m_data->m_bodyBufferGPU = new btOpenCLArray<b3RigidBodyCL>(ctx,queue, config.m_maxConvexBodies,false);
|
||||
m_data->m_bodyBufferGPU = new b3OpenCLArray<b3RigidBodyCL>(ctx,queue, config.m_maxConvexBodies,false);
|
||||
|
||||
m_data->m_convexFacesGPU = new btOpenCLArray<btGpuFace>(ctx,queue,config.m_maxConvexShapes*config.m_maxFacesPerShape,false);
|
||||
m_data->m_gpuChildShapes = new btOpenCLArray<btGpuChildShape>(ctx,queue,config.m_maxCompoundChildShapes,false);
|
||||
m_data->m_convexFacesGPU = new b3OpenCLArray<b3GpuFace>(ctx,queue,config.m_maxConvexShapes*config.m_maxFacesPerShape,false);
|
||||
m_data->m_gpuChildShapes = new b3OpenCLArray<b3GpuChildShape>(ctx,queue,config.m_maxCompoundChildShapes,false);
|
||||
|
||||
m_data->m_convexPolyhedraGPU = new btOpenCLArray<b3ConvexPolyhedronCL>(ctx,queue,config.m_maxConvexShapes,false);
|
||||
m_data->m_uniqueEdgesGPU = new btOpenCLArray<b3Vector3>(ctx,queue,config.m_maxConvexUniqueEdges,true);
|
||||
m_data->m_convexVerticesGPU = new btOpenCLArray<b3Vector3>(ctx,queue,config.m_maxConvexVertices,true);
|
||||
m_data->m_convexIndicesGPU = new btOpenCLArray<int>(ctx,queue,config.m_maxConvexIndices,true);
|
||||
m_data->m_convexPolyhedraGPU = new b3OpenCLArray<b3ConvexPolyhedronCL>(ctx,queue,config.m_maxConvexShapes,false);
|
||||
m_data->m_uniqueEdgesGPU = new b3OpenCLArray<b3Vector3>(ctx,queue,config.m_maxConvexUniqueEdges,true);
|
||||
m_data->m_convexVerticesGPU = new b3OpenCLArray<b3Vector3>(ctx,queue,config.m_maxConvexVertices,true);
|
||||
m_data->m_convexIndicesGPU = new b3OpenCLArray<int>(ctx,queue,config.m_maxConvexIndices,true);
|
||||
|
||||
|
||||
m_data->m_worldVertsB1GPU = new btOpenCLArray<b3Vector3>(ctx,queue,config.m_maxConvexBodies*config.m_maxVerticesPerFace);
|
||||
m_data->m_clippingFacesOutGPU = new btOpenCLArray<btInt4>(ctx,queue,config.m_maxConvexBodies);
|
||||
m_data->m_worldNormalsAGPU = new btOpenCLArray<b3Vector3>(ctx,queue,config.m_maxConvexBodies);
|
||||
m_data->m_worldVertsA1GPU = new btOpenCLArray<b3Vector3>(ctx,queue,config.m_maxConvexBodies*config.m_maxVerticesPerFace);
|
||||
m_data->m_worldVertsB2GPU = new btOpenCLArray<b3Vector3>(ctx,queue,config.m_maxConvexBodies*config.m_maxVerticesPerFace);
|
||||
m_data->m_worldVertsB1GPU = new b3OpenCLArray<b3Vector3>(ctx,queue,config.m_maxConvexBodies*config.m_maxVerticesPerFace);
|
||||
m_data->m_clippingFacesOutGPU = new b3OpenCLArray<b3Int4>(ctx,queue,config.m_maxConvexBodies);
|
||||
m_data->m_worldNormalsAGPU = new b3OpenCLArray<b3Vector3>(ctx,queue,config.m_maxConvexBodies);
|
||||
m_data->m_worldVertsA1GPU = new b3OpenCLArray<b3Vector3>(ctx,queue,config.m_maxConvexBodies*config.m_maxVerticesPerFace);
|
||||
m_data->m_worldVertsB2GPU = new b3OpenCLArray<b3Vector3>(ctx,queue,config.m_maxConvexBodies*config.m_maxVerticesPerFace);
|
||||
|
||||
|
||||
|
||||
@@ -150,12 +150,12 @@ m_queue(queue)
|
||||
m_data->m_numAcceleratedRigidBodies = 0;
|
||||
|
||||
|
||||
m_data->m_subTreesGPU = new btOpenCLArray<btBvhSubtreeInfo>(this->m_context,this->m_queue);
|
||||
m_data->m_treeNodesGPU = new btOpenCLArray<btQuantizedBvhNode>(this->m_context,this->m_queue);
|
||||
m_data->m_bvhInfoGPU = new btOpenCLArray<b3BvhInfo>(this->m_context,this->m_queue);
|
||||
m_data->m_subTreesGPU = new b3OpenCLArray<b3BvhSubtreeInfo>(this->m_context,this->m_queue);
|
||||
m_data->m_treeNodesGPU = new b3OpenCLArray<b3QuantizedBvhNode>(this->m_context,this->m_queue);
|
||||
m_data->m_bvhInfoGPU = new b3OpenCLArray<b3BvhInfo>(this->m_context,this->m_queue);
|
||||
|
||||
//m_data->m_contactCGPU = new btOpenCLArray<Constraint4>(ctx,queue,config.m_maxBroadphasePairs,false);
|
||||
//m_data->m_frictionCGPU = new btOpenCLArray<adl::Solver<adl::TYPE_CL>::allocateFrictionConstraint( m_data->m_deviceCL, config.m_maxBroadphasePairs);
|
||||
//m_data->m_contactCGPU = new b3OpenCLArray<Constraint4>(ctx,queue,config.m_maxBroadphasePairs,false);
|
||||
//m_data->m_frictionCGPU = new b3OpenCLArray<adl::Solver<adl::TYPE_CL>::allocateFrictionConstraint( m_data->m_deviceCL, config.m_maxBroadphasePairs);
|
||||
|
||||
}
|
||||
|
||||
@@ -246,7 +246,7 @@ int b3GpuNarrowPhase::registerSphereShape(float radius)
|
||||
int b3GpuNarrowPhase::registerFace(const b3Vector3& faceNormal, float faceConstant)
|
||||
{
|
||||
int faceOffset = m_data->m_convexFaces.size();
|
||||
btGpuFace& face = m_data->m_convexFaces.expand();
|
||||
b3GpuFace& face = m_data->m_convexFaces.expand();
|
||||
face.m_plane[0] = faceNormal.getX();
|
||||
face.m_plane[1] = faceNormal.getY();
|
||||
face.m_plane[2] = faceNormal.getZ();
|
||||
@@ -426,7 +426,7 @@ int b3GpuNarrowPhase::registerConvexHullShape(b3ConvexUtility* utilPtr)
|
||||
|
||||
}
|
||||
|
||||
int b3GpuNarrowPhase::registerCompoundShape(b3AlignedObjectArray<btGpuChildShape>* childShapes)
|
||||
int b3GpuNarrowPhase::registerCompoundShape(b3AlignedObjectArray<b3GpuChildShape>* childShapes)
|
||||
{
|
||||
|
||||
int collidableIndex = allocateCollidable();
|
||||
@@ -435,7 +435,7 @@ int b3GpuNarrowPhase::registerCompoundShape(b3AlignedObjectArray<btGpuChildShap
|
||||
|
||||
col.m_shapeIndex = m_data->m_cpuChildShapes.size();
|
||||
{
|
||||
btAssert(col.m_shapeIndex+childShapes->size()<m_data->m_config.m_maxCompoundChildShapes);
|
||||
b3Assert(col.m_shapeIndex+childShapes->size()<m_data->m_config.m_maxCompoundChildShapes);
|
||||
for (int i=0;i<childShapes->size();i++)
|
||||
{
|
||||
m_data->m_cpuChildShapes.push_back(childShapes->at(i));
|
||||
@@ -474,7 +474,7 @@ int b3GpuNarrowPhase::registerCompoundShape(b3AlignedObjectArray<btGpuChildShap
|
||||
childShapes->at(i).m_childOrientation[1],
|
||||
childShapes->at(i).m_childOrientation[2],
|
||||
childShapes->at(i).m_childOrientation[3]));
|
||||
btTransformAabb(childLocalAabbMin,childLocalAabbMax,margin,childTr,aMin,aMax);
|
||||
b3TransformAabb(childLocalAabbMin,childLocalAabbMax,margin,childTr,aMin,aMax);
|
||||
myAabbMin.setMin(aMin);
|
||||
myAabbMax.setMax(aMax);
|
||||
}
|
||||
@@ -539,7 +539,7 @@ int b3GpuNarrowPhase::registerConcaveMesh(b3AlignedObjectArray<b3Vector3>* vert
|
||||
|
||||
bool useQuantizedAabbCompression = true;
|
||||
b3TriangleIndexVertexArray* meshInterface=new b3TriangleIndexVertexArray();
|
||||
btIndexedMesh mesh;
|
||||
b3IndexedMesh mesh;
|
||||
mesh.m_numTriangles = indices->size()/3;
|
||||
mesh.m_numVertices = vertices->size();
|
||||
mesh.m_vertexBase = (const unsigned char *)&vertices->at(0).getX();
|
||||
@@ -551,7 +551,7 @@ int b3GpuNarrowPhase::registerConcaveMesh(b3AlignedObjectArray<b3Vector3>* vert
|
||||
bvh->build(meshInterface, useQuantizedAabbCompression, (b3Vector3&)aabb.m_min, (b3Vector3&)aabb.m_max);
|
||||
m_data->m_bvhData.push_back(bvh);
|
||||
int numNodes = bvh->getQuantizedNodeArray().size();
|
||||
//btOpenCLArray<btQuantizedBvhNode>* treeNodesGPU = new btOpenCLArray<btQuantizedBvhNode>(this->m_context,this->m_queue,numNodes);
|
||||
//b3OpenCLArray<b3QuantizedBvhNode>* treeNodesGPU = new b3OpenCLArray<b3QuantizedBvhNode>(this->m_context,this->m_queue,numNodes);
|
||||
//treeNodesGPU->copyFromHost(bvh->getQuantizedNodeArray());
|
||||
int numSubTrees = bvh->getSubtreeInfoArray().size();
|
||||
|
||||
@@ -582,7 +582,7 @@ int b3GpuNarrowPhase::registerConcaveMesh(b3AlignedObjectArray<b3Vector3>* vert
|
||||
m_data->m_treeNodesCPU.push_back(bvh->getQuantizedNodeArray()[i]);
|
||||
}
|
||||
|
||||
//btOpenCLArray<btBvhSubtreeInfo>* subTreesGPU = new btOpenCLArray<btBvhSubtreeInfo>(this->m_context,this->m_queue,numSubTrees);
|
||||
//b3OpenCLArray<b3BvhSubtreeInfo>* subTreesGPU = new b3OpenCLArray<b3BvhSubtreeInfo>(this->m_context,this->m_queue,numSubTrees);
|
||||
//subTreesGPU->copyFromHost(bvh->getSubtreeInfoArray());
|
||||
|
||||
m_data->m_treeNodesGPU->copyFromHost(m_data->m_treeNodesCPU);
|
||||
@@ -741,12 +741,12 @@ void b3GpuNarrowPhase::computeContacts(cl_mem broadphasePairs, int numBroadphase
|
||||
int nContactOut = 0;
|
||||
|
||||
int maxTriConvexPairCapacity = m_data->m_config.m_maxTriConvexPairCapacity;
|
||||
btOpenCLArray<btInt4> triangleConvexPairs(m_context,m_queue, maxTriConvexPairCapacity);
|
||||
b3OpenCLArray<b3Int4> triangleConvexPairs(m_context,m_queue, maxTriConvexPairCapacity);
|
||||
int numTriConvexPairsOut=0;
|
||||
|
||||
btOpenCLArray<btInt2> broadphasePairsGPU(m_context,m_queue);
|
||||
b3OpenCLArray<b3Int2> broadphasePairsGPU(m_context,m_queue);
|
||||
broadphasePairsGPU.setFromOpenCLBuffer(broadphasePairs,numBroadphasePairs);
|
||||
btOpenCLArray<btYetAnotherAabb> clAabbArray(this->m_context,this->m_queue);
|
||||
b3OpenCLArray<b3YetAnotherAabb> clAabbArray(this->m_context,this->m_queue);
|
||||
clAabbArray.setFromOpenCLBuffer(aabbsWS,numObjects);
|
||||
|
||||
m_data->m_gpuSatCollision->computeConvexConvexContactsGPUSAT(
|
||||
@@ -794,7 +794,7 @@ int b3GpuNarrowPhase::registerRigidBody(int collidableIndex, float mass, const f
|
||||
b3Vector3 aabbMin(aabbMinPtr[0],aabbMinPtr[1],aabbMinPtr[2]);
|
||||
b3Vector3 aabbMax (aabbMaxPtr[0],aabbMaxPtr[1],aabbMaxPtr[2]);
|
||||
|
||||
btAssert(m_data->m_numAcceleratedRigidBodies< (m_data->m_config.m_maxConvexBodies-1));
|
||||
b3Assert(m_data->m_numAcceleratedRigidBodies< (m_data->m_config.m_maxConvexBodies-1));
|
||||
|
||||
m_data->m_bodyBufferGPU->resize(m_data->m_numAcceleratedRigidBodies+1);
|
||||
|
||||
@@ -828,7 +828,7 @@ int b3GpuNarrowPhase::registerRigidBody(int collidableIndex, float mass, const f
|
||||
m_data->m_bodyBufferGPU->copyFromHostPointer(&body,1,m_data->m_numAcceleratedRigidBodies);
|
||||
}
|
||||
|
||||
btInertiaCL& shapeInfo = m_data->m_inertiaBufferCPU->at(m_data->m_numAcceleratedRigidBodies);
|
||||
b3InertiaCL& shapeInfo = m_data->m_inertiaBufferCPU->at(m_data->m_numAcceleratedRigidBodies);
|
||||
|
||||
if (mass==0.f)
|
||||
{
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#ifndef BT_GPU_NARROWPHASE_H
|
||||
#define BT_GPU_NARROWPHASE_H
|
||||
#ifndef B3_GPU_NARROWPHASE_H
|
||||
#define B3_GPU_NARROWPHASE_H
|
||||
|
||||
#include "../../gpu_narrowphase/host/b3Collidable.h"
|
||||
#include "basic_initialize/b3OpenCLInclude.h"
|
||||
@@ -10,7 +10,7 @@ class b3GpuNarrowPhase
|
||||
{
|
||||
protected:
|
||||
|
||||
struct btGpuNarrowPhaseInternalData* m_data;
|
||||
struct b3GpuNarrowPhaseInternalData* m_data;
|
||||
int m_acceleratedCompanionShapeIndex;
|
||||
int m_planeBodyIndex;
|
||||
int m_static0Index;
|
||||
@@ -34,7 +34,7 @@ public:
|
||||
int registerSphereShape(float radius);
|
||||
int registerPlaneShape(const b3Vector3& planeNormal, float planeConstant);
|
||||
|
||||
int registerCompoundShape(b3AlignedObjectArray<btGpuChildShape>* childShapes);
|
||||
int registerCompoundShape(b3AlignedObjectArray<b3GpuChildShape>* childShapes);
|
||||
int registerFace(const b3Vector3& faceNormal, float faceConstant);
|
||||
|
||||
int registerConcaveMesh(b3AlignedObjectArray<b3Vector3>* vertices, b3AlignedObjectArray<int>* indices,const float* scaling);
|
||||
@@ -82,5 +82,5 @@ public:
|
||||
const struct b3SapAabb& getLocalSpaceAabb(int collidableIndex) const;
|
||||
};
|
||||
|
||||
#endif //BT_GPU_NARROWPHASE_H
|
||||
#endif //B3_GPU_NARROWPHASE_H
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
#include "Bullet3Geometry/b3AabbUtil.h"
|
||||
#include "../../gpu_broadphase/host/b3SapAabb.h"
|
||||
#include "../../gpu_broadphase/host/b3GpuSapBroadphase.h"
|
||||
#include "parallel_primitives/host/btLauncherCL.h"
|
||||
#include "parallel_primitives/host/b3LauncherCL.h"
|
||||
#include "Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.h"
|
||||
|
||||
#include "Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.h"
|
||||
@@ -20,7 +20,7 @@ bool useBullet2CpuSolver = true;//false;
|
||||
bool dumpContactStats = false;
|
||||
|
||||
#ifdef TEST_OTHER_GPU_SOLVER
|
||||
#include "btGpuJacobiSolver.h"
|
||||
#include "b3GpuJacobiSolver.h"
|
||||
#endif //TEST_OTHER_GPU_SOLVER
|
||||
|
||||
#include "Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h"
|
||||
@@ -43,11 +43,11 @@ b3GpuRigidBodyPipeline::b3GpuRigidBodyPipeline(cl_context ctx,cl_device_id devic
|
||||
|
||||
m_data->m_solver = new b3PgsJacobiSolver();
|
||||
b3Config config;
|
||||
m_data->m_allAabbsGPU = new btOpenCLArray<b3SapAabb>(ctx,q,config.m_maxConvexBodies);
|
||||
m_data->m_overlappingPairsGPU = new btOpenCLArray<btBroadphasePair>(ctx,q,config.m_maxBroadphasePairs);
|
||||
m_data->m_allAabbsGPU = new b3OpenCLArray<b3SapAabb>(ctx,q,config.m_maxConvexBodies);
|
||||
m_data->m_overlappingPairsGPU = new b3OpenCLArray<b3BroadphasePair>(ctx,q,config.m_maxBroadphasePairs);
|
||||
|
||||
#ifdef TEST_OTHER_GPU_SOLVER
|
||||
m_data->m_solver3 = new btGpuJacobiSolver(ctx,device,q,config.m_maxBroadphasePairs);
|
||||
m_data->m_solver3 = new b3GpuJacobiSolver(ctx,device,q,config.m_maxBroadphasePairs);
|
||||
#endif // TEST_OTHER_GPU_SOLVER
|
||||
|
||||
m_data->m_solver2 = new b3GpuBatchingPgsSolver(ctx,device,q,config.m_maxBroadphasePairs);
|
||||
@@ -61,16 +61,16 @@ b3GpuRigidBodyPipeline::b3GpuRigidBodyPipeline(cl_context ctx,cl_device_id devic
|
||||
|
||||
{
|
||||
cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context,m_data->m_device,integrateKernelCL,&errNum,"","opencl/gpu_rigidbody/kernels/integrateKernel.cl");
|
||||
btAssert(errNum==CL_SUCCESS);
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
m_data->m_integrateTransformsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,integrateKernelCL, "integrateTransformsKernel",&errNum,prog);
|
||||
btAssert(errNum==CL_SUCCESS);
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
clReleaseProgram(prog);
|
||||
}
|
||||
{
|
||||
cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context,m_data->m_device,updateAabbsKernelCL,&errNum,"","opencl/gpu_rigidbody/kernels/updateAabbsKernel.cl");
|
||||
btAssert(errNum==CL_SUCCESS);
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
m_data->m_updateAabbsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,updateAabbsKernelCL, "initializeGpuAabbsFull",&errNum,prog);
|
||||
btAssert(errNum==CL_SUCCESS);
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
clReleaseProgram(prog);
|
||||
}
|
||||
|
||||
@@ -116,11 +116,11 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime)
|
||||
if (useDbvt)
|
||||
{
|
||||
{
|
||||
BT_PROFILE("setAabb");
|
||||
B3_PROFILE("setAabb");
|
||||
m_data->m_allAabbsGPU->copyToHost(m_data->m_allAabbsCPU);
|
||||
for (int i=0;i<m_data->m_allAabbsCPU.size();i++)
|
||||
{
|
||||
btBroadphaseProxy* proxy = &m_data->m_broadphaseDbvt->m_proxies[i];
|
||||
b3BroadphaseProxy* proxy = &m_data->m_broadphaseDbvt->m_proxies[i];
|
||||
b3Vector3 aabbMin(m_data->m_allAabbsCPU[i].m_min[0],m_data->m_allAabbsCPU[i].m_min[1],m_data->m_allAabbsCPU[i].m_min[2]);
|
||||
b3Vector3 aabbMax(m_data->m_allAabbsCPU[i].m_max[0],m_data->m_allAabbsCPU[i].m_max[1],m_data->m_allAabbsCPU[i].m_max[2]);
|
||||
m_data->m_broadphaseDbvt->setAabb(proxy,aabbMin,aabbMax,0);
|
||||
@@ -128,7 +128,7 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime)
|
||||
}
|
||||
|
||||
{
|
||||
BT_PROFILE("calculateOverlappingPairs");
|
||||
B3_PROFILE("calculateOverlappingPairs");
|
||||
m_data->m_broadphaseDbvt->calculateOverlappingPairs();
|
||||
}
|
||||
numPairs = m_data->m_broadphaseDbvt->getOverlappingPairCache()->getNumOverlappingPairs();
|
||||
@@ -153,7 +153,7 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime)
|
||||
cl_mem aabbsWS =0;
|
||||
if (useDbvt)
|
||||
{
|
||||
BT_PROFILE("m_overlappingPairsGPU->copyFromHost");
|
||||
B3_PROFILE("m_overlappingPairsGPU->copyFromHost");
|
||||
m_data->m_overlappingPairsGPU->copyFromHost(m_data->m_broadphaseDbvt->getOverlappingPairCache()->getOverlappingPairArray());
|
||||
pairs = m_data->m_overlappingPairsGPU->getBufferCL();
|
||||
aabbsWS = m_data->m_allAabbsGPU->getBufferCL();
|
||||
@@ -190,11 +190,11 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime)
|
||||
|
||||
//solve constraints
|
||||
|
||||
btOpenCLArray<b3RigidBodyCL> gpuBodies(m_data->m_context,m_data->m_queue,0,true);
|
||||
b3OpenCLArray<b3RigidBodyCL> gpuBodies(m_data->m_context,m_data->m_queue,0,true);
|
||||
gpuBodies.setFromOpenCLBuffer(m_data->m_narrowphase->getBodiesGpu(),m_data->m_narrowphase->getNumBodiesGpu());
|
||||
btOpenCLArray<btInertiaCL> gpuInertias(m_data->m_context,m_data->m_queue,0,true);
|
||||
b3OpenCLArray<b3InertiaCL> gpuInertias(m_data->m_context,m_data->m_queue,0,true);
|
||||
gpuInertias.setFromOpenCLBuffer(m_data->m_narrowphase->getBodyInertiasGpu(),m_data->m_narrowphase->getNumBodiesGpu());
|
||||
btOpenCLArray<b3Contact4> gpuContacts(m_data->m_context,m_data->m_queue,0,true);
|
||||
b3OpenCLArray<b3Contact4> gpuContacts(m_data->m_context,m_data->m_queue,0,true);
|
||||
gpuContacts.setFromOpenCLBuffer(m_data->m_narrowphase->getContactsGpu(),m_data->m_narrowphase->getNumContactsGpu());
|
||||
|
||||
if (useBullet2CpuSolver)
|
||||
@@ -202,7 +202,7 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime)
|
||||
|
||||
b3AlignedObjectArray<b3RigidBodyCL> hostBodies;
|
||||
gpuBodies.copyToHost(hostBodies);
|
||||
b3AlignedObjectArray<btInertiaCL> hostInertias;
|
||||
b3AlignedObjectArray<b3InertiaCL> hostInertias;
|
||||
gpuInertias.copyToHost(hostInertias);
|
||||
b3AlignedObjectArray<b3Contact4> hostContacts;
|
||||
gpuContacts.copyToHost(hostContacts);
|
||||
@@ -230,36 +230,36 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime)
|
||||
if (forceHost)
|
||||
{
|
||||
b3AlignedObjectArray<b3RigidBodyCL> hostBodies;
|
||||
b3AlignedObjectArray<btInertiaCL> hostInertias;
|
||||
b3AlignedObjectArray<b3InertiaCL> hostInertias;
|
||||
b3AlignedObjectArray<b3Contact4> hostContacts;
|
||||
|
||||
{
|
||||
BT_PROFILE("copyToHost");
|
||||
B3_PROFILE("copyToHost");
|
||||
gpuBodies.copyToHost(hostBodies);
|
||||
gpuInertias.copyToHost(hostInertias);
|
||||
gpuContacts.copyToHost(hostContacts);
|
||||
}
|
||||
|
||||
{
|
||||
btJacobiSolverInfo solverInfo;
|
||||
b3JacobiSolverInfo solverInfo;
|
||||
m_data->m_solver3->solveGroupHost(&hostBodies[0], &hostInertias[0], hostBodies.size(),&hostContacts[0],hostContacts.size(),0,0,solverInfo);
|
||||
|
||||
|
||||
}
|
||||
{
|
||||
BT_PROFILE("copyFromHost");
|
||||
B3_PROFILE("copyFromHost");
|
||||
gpuBodies.copyFromHost(hostBodies);
|
||||
}
|
||||
} else
|
||||
{
|
||||
btJacobiSolverInfo solverInfo;
|
||||
b3JacobiSolverInfo solverInfo;
|
||||
m_data->m_solver3->solveGroup(&gpuBodies, &gpuInertias, &gpuContacts,solverInfo);
|
||||
}
|
||||
} else
|
||||
{
|
||||
b3AlignedObjectArray<b3RigidBodyCL> hostBodies;
|
||||
gpuBodies.copyToHost(hostBodies);
|
||||
b3AlignedObjectArray<btInertiaCL> hostInertias;
|
||||
b3AlignedObjectArray<b3InertiaCL> hostInertias;
|
||||
gpuInertias.copyToHost(hostInertias);
|
||||
b3AlignedObjectArray<b3Contact4> hostContacts;
|
||||
gpuContacts.copyToHost(hostContacts);
|
||||
@@ -279,9 +279,9 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime)
|
||||
|
||||
|
||||
/*m_data->m_solver3->solveContactConstraintHost(
|
||||
(btOpenCLArray<RigidBodyBase::Body>*)&gpuBodies,
|
||||
(btOpenCLArray<RigidBodyBase::Inertia>*)&gpuInertias,
|
||||
(btOpenCLArray<Constraint4>*) &gpuContacts,
|
||||
(b3OpenCLArray<RigidBodyBase::Body>*)&gpuBodies,
|
||||
(b3OpenCLArray<RigidBodyBase::Inertia>*)&gpuInertias,
|
||||
(b3OpenCLArray<Constraint4>*) &gpuContacts,
|
||||
0,numContacts,256);
|
||||
*/
|
||||
}
|
||||
@@ -295,7 +295,7 @@ void b3GpuRigidBodyPipeline::integrate(float timeStep)
|
||||
{
|
||||
//integrate
|
||||
|
||||
btLauncherCL launcher(m_data->m_queue,m_data->m_integrateTransformsKernel);
|
||||
b3LauncherCL launcher(m_data->m_queue,m_data->m_integrateTransformsKernel);
|
||||
launcher.setBuffer(m_data->m_narrowphase->getBodiesGpu());
|
||||
int numBodies = m_data->m_narrowphase->getNumBodiesGpu();
|
||||
launcher.setConst(numBodies);
|
||||
@@ -319,8 +319,8 @@ void b3GpuRigidBodyPipeline::setupGpuAabbsFull()
|
||||
if (!numBodies)
|
||||
return;
|
||||
|
||||
//__kernel void initializeGpuAabbsFull( const int numNodes, __global Body* gBodies,__global Collidable* collidables, __global btAABBCL* plocalShapeAABB, __global btAABBCL* pAABB)
|
||||
btLauncherCL launcher(m_data->m_queue,m_data->m_updateAabbsKernel);
|
||||
//__kernel void initializeGpuAabbsFull( const int numNodes, __global Body* gBodies,__global Collidable* collidables, __global b3AABBCL* plocalShapeAABB, __global b3AABBCL* pAABB)
|
||||
b3LauncherCL launcher(m_data->m_queue,m_data->m_updateAabbsKernel);
|
||||
launcher.setConst(numBodies);
|
||||
cl_mem bodies = m_data->m_narrowphase->getBodiesGpu();
|
||||
launcher.setBuffer(bodies);
|
||||
@@ -379,7 +379,7 @@ int b3GpuRigidBodyPipeline::registerPhysicsInstance(float mass, const float* po
|
||||
t.setIdentity();
|
||||
t.setOrigin(b3Vector3(position[0],position[1],position[2]));
|
||||
t.setRotation(b3Quaternion(orientation[0],orientation[1],orientation[2],orientation[3]));
|
||||
btTransformAabb(localAabbMin,localAabbMax, margin,t,aabbMin,aabbMax);
|
||||
b3TransformAabb(localAabbMin,localAabbMax, margin,t,aabbMin,aabbMax);
|
||||
if (useDbvt)
|
||||
{
|
||||
m_data->m_broadphaseDbvt->createProxy(aabbMin,aabbMax,bodyIndex,0,1,1);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#ifndef BT_GPU_RIGIDBODY_PIPELINE_H
|
||||
#define BT_GPU_RIGIDBODY_PIPELINE_H
|
||||
#ifndef B3_GPU_RIGIDBODY_PIPELINE_H
|
||||
#define B3_GPU_RIGIDBODY_PIPELINE_H
|
||||
|
||||
#include "../../basic_initialize/b3OpenCLInclude.h"
|
||||
|
||||
@@ -27,7 +27,7 @@ public:
|
||||
//int registerPlaneShape(const b3Vector3& planeNormal, float planeConstant);
|
||||
|
||||
//int registerConcaveMesh(b3AlignedObjectArray<b3Vector3>* vertices, b3AlignedObjectArray<int>* indices, const float* scaling);
|
||||
//int registerCompoundShape(b3AlignedObjectArray<btGpuChildShape>* childShapes);
|
||||
//int registerCompoundShape(b3AlignedObjectArray<b3GpuChildShape>* childShapes);
|
||||
|
||||
|
||||
int registerPhysicsInstance(float mass, const float* position, const float* orientation, int collisionShapeIndex, int userData, bool writeInstanceToGpu);
|
||||
@@ -42,4 +42,4 @@ public:
|
||||
|
||||
};
|
||||
|
||||
#endif //BT_GPU_RIGIDBODY_PIPELINE_H
|
||||
#endif //B3_GPU_RIGIDBODY_PIPELINE_H
|
||||
@@ -1,10 +1,10 @@
|
||||
#ifndef BT_GPU_RIGIDBODY_PIPELINE_INTERNAL_DATA_H
|
||||
#define BT_GPU_RIGIDBODY_PIPELINE_INTERNAL_DATA_H
|
||||
#ifndef B3_GPU_RIGIDBODY_PIPELINE_INTERNAL_DATA_H
|
||||
#define B3_GPU_RIGIDBODY_PIPELINE_INTERNAL_DATA_H
|
||||
|
||||
#include "../../basic_initialize/b3OpenCLInclude.h"
|
||||
#include "Bullet3Common/b3AlignedObjectArray.h"
|
||||
|
||||
#include "../../parallel_primitives/host/btOpenCLArray.h"
|
||||
#include "../../parallel_primitives/host/b3OpenCLArray.h"
|
||||
#include "../../gpu_narrowphase/host/b3Collidable.h"
|
||||
|
||||
#include "gpu_broadphase/host/b3SapAabb.h"
|
||||
@@ -26,19 +26,19 @@ struct b3GpuRigidBodyPipelineInternalData
|
||||
|
||||
class b3PgsJacobiSolver* m_solver;
|
||||
class b3GpuBatchingPgsSolver* m_solver2;
|
||||
class btGpuJacobiSolver* m_solver3;
|
||||
class b3GpuJacobiSolver* m_solver3;
|
||||
|
||||
class b3GpuSapBroadphase* m_broadphaseSap;
|
||||
|
||||
class b3DynamicBvhBroadphase* m_broadphaseDbvt;
|
||||
btOpenCLArray<b3SapAabb>* m_allAabbsGPU;
|
||||
b3OpenCLArray<b3SapAabb>* m_allAabbsGPU;
|
||||
b3AlignedObjectArray<b3SapAabb> m_allAabbsCPU;
|
||||
btOpenCLArray<btBroadphasePair>* m_overlappingPairsGPU;
|
||||
b3OpenCLArray<b3BroadphasePair>* m_overlappingPairsGPU;
|
||||
|
||||
b3AlignedObjectArray<b3TypedConstraint*> m_joints;
|
||||
class b3GpuNarrowPhase* m_narrowphase;
|
||||
|
||||
};
|
||||
|
||||
#endif //BT_GPU_RIGIDBODY_PIPELINE_INTERNAL_DATA_H
|
||||
#endif //B3_GPU_RIGIDBODY_PIPELINE_INTERNAL_DATA_H
|
||||
|
||||
|
||||
@@ -40,7 +40,7 @@ bool useNewBatchingKernel = true;
|
||||
|
||||
|
||||
#include "Bullet3Common/b3Quickprof.h"
|
||||
#include "../../parallel_primitives/host/btLauncherCL.h"
|
||||
#include "../../parallel_primitives/host/b3LauncherCL.h"
|
||||
#include "Bullet3Common/b3Vector3.h"
|
||||
|
||||
struct SolverDebugInfo
|
||||
@@ -80,8 +80,8 @@ class SolverDeviceInl
|
||||
public:
|
||||
struct ParallelSolveData
|
||||
{
|
||||
btOpenCLArray<unsigned int>* m_numConstraints;
|
||||
btOpenCLArray<unsigned int>* m_offsets;
|
||||
b3OpenCLArray<unsigned int>* m_numConstraints;
|
||||
b3OpenCLArray<unsigned int>* m_offsets;
|
||||
};
|
||||
};
|
||||
|
||||
@@ -93,19 +93,19 @@ b3Solver::b3Solver(cl_context ctx, cl_device_id device, cl_command_queue queue,
|
||||
m_device(device),
|
||||
m_queue(queue)
|
||||
{
|
||||
m_sort32 = new btRadixSort32CL(ctx,device,queue);
|
||||
m_scan = new btPrefixScanCL(ctx,device,queue,N_SPLIT*N_SPLIT);
|
||||
m_search = new btBoundSearchCL(ctx,device,queue,N_SPLIT*N_SPLIT);
|
||||
m_sort32 = new b3RadixSort32CL(ctx,device,queue);
|
||||
m_scan = new b3PrefixScanCL(ctx,device,queue,N_SPLIT*N_SPLIT);
|
||||
m_search = new b3BoundSearchCL(ctx,device,queue,N_SPLIT*N_SPLIT);
|
||||
|
||||
const int sortSize = BTNEXTMULTIPLEOF( pairCapacity, 512 );
|
||||
const int sortSize = B3NEXTMULTIPLEOF( pairCapacity, 512 );
|
||||
|
||||
m_sortDataBuffer = new btOpenCLArray<btSortData>(ctx,queue,sortSize);
|
||||
m_contactBuffer2 = new btOpenCLArray<b3Contact4>(ctx,queue);
|
||||
m_sortDataBuffer = new b3OpenCLArray<b3SortData>(ctx,queue,sortSize);
|
||||
m_contactBuffer2 = new b3OpenCLArray<b3Contact4>(ctx,queue);
|
||||
|
||||
m_numConstraints = new btOpenCLArray<unsigned int>(ctx,queue,N_SPLIT*N_SPLIT );
|
||||
m_numConstraints = new b3OpenCLArray<unsigned int>(ctx,queue,N_SPLIT*N_SPLIT );
|
||||
m_numConstraints->resize(N_SPLIT*N_SPLIT);
|
||||
|
||||
m_offsets = new btOpenCLArray<unsigned int>( ctx,queue, N_SPLIT*N_SPLIT );
|
||||
m_offsets = new b3OpenCLArray<unsigned int>( ctx,queue, N_SPLIT*N_SPLIT );
|
||||
m_offsets->resize(N_SPLIT*N_SPLIT);
|
||||
const char* additionalMacros = "";
|
||||
const char* srcFileNameForCaching="";
|
||||
@@ -126,54 +126,54 @@ b3Solver::b3Solver(cl_context ctx, cl_device_id device, cl_command_queue queue,
|
||||
{
|
||||
|
||||
cl_program solveContactProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveContactSource, &pErrNum,additionalMacros, SOLVER_CONTACT_KERNEL_PATH);
|
||||
btAssert(solveContactProg);
|
||||
b3Assert(solveContactProg);
|
||||
|
||||
cl_program solveFrictionProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveFrictionSource, &pErrNum,additionalMacros, SOLVER_FRICTION_KERNEL_PATH);
|
||||
btAssert(solveFrictionProg);
|
||||
b3Assert(solveFrictionProg);
|
||||
|
||||
cl_program solverSetup2Prog= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solverSetup2Source, &pErrNum,additionalMacros, SOLVER_SETUP2_KERNEL_PATH);
|
||||
btAssert(solverSetup2Prog);
|
||||
b3Assert(solverSetup2Prog);
|
||||
|
||||
|
||||
cl_program solverSetupProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solverSetupSource, &pErrNum,additionalMacros, SOLVER_SETUP_KERNEL_PATH);
|
||||
btAssert(solverSetupProg);
|
||||
b3Assert(solverSetupProg);
|
||||
|
||||
|
||||
m_solveFrictionKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, solveFrictionSource, "BatchSolveKernelFriction", &pErrNum, solveFrictionProg,additionalMacros );
|
||||
btAssert(m_solveFrictionKernel);
|
||||
b3Assert(m_solveFrictionKernel);
|
||||
|
||||
m_solveContactKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, solveContactSource, "BatchSolveKernelContact", &pErrNum, solveContactProg,additionalMacros );
|
||||
btAssert(m_solveContactKernel);
|
||||
b3Assert(m_solveContactKernel);
|
||||
|
||||
m_contactToConstraintKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetupSource, "ContactToConstraintKernel", &pErrNum, solverSetupProg,additionalMacros );
|
||||
btAssert(m_contactToConstraintKernel);
|
||||
b3Assert(m_contactToConstraintKernel);
|
||||
|
||||
m_setSortDataKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "SetSortDataKernel", &pErrNum, solverSetup2Prog,additionalMacros );
|
||||
btAssert(m_setSortDataKernel);
|
||||
b3Assert(m_setSortDataKernel);
|
||||
|
||||
m_reorderContactKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "ReorderContactKernel", &pErrNum, solverSetup2Prog,additionalMacros );
|
||||
btAssert(m_reorderContactKernel);
|
||||
b3Assert(m_reorderContactKernel);
|
||||
|
||||
|
||||
m_copyConstraintKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "CopyConstraintKernel", &pErrNum, solverSetup2Prog,additionalMacros );
|
||||
btAssert(m_copyConstraintKernel);
|
||||
b3Assert(m_copyConstraintKernel);
|
||||
|
||||
}
|
||||
|
||||
{
|
||||
cl_program batchingProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelSource, &pErrNum,additionalMacros, BATCHING_PATH);
|
||||
btAssert(batchingProg);
|
||||
b3Assert(batchingProg);
|
||||
|
||||
m_batchingKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelSource, "CreateBatches", &pErrNum, batchingProg,additionalMacros );
|
||||
btAssert(m_batchingKernel);
|
||||
b3Assert(m_batchingKernel);
|
||||
}
|
||||
{
|
||||
cl_program batchingNewProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelNewSource, &pErrNum,additionalMacros, BATCHING_NEW_PATH);
|
||||
btAssert(batchingNewProg);
|
||||
b3Assert(batchingNewProg);
|
||||
|
||||
m_batchingKernelNew = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesNew", &pErrNum, batchingNewProg,additionalMacros );
|
||||
//m_batchingKernelNew = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesBruteForce", &pErrNum, batchingNewProg,additionalMacros );
|
||||
btAssert(m_batchingKernelNew);
|
||||
b3Assert(m_batchingKernelNew);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -204,9 +204,9 @@ b3Solver::~b3Solver()
|
||||
|
||||
|
||||
|
||||
/*void b3Solver::reorderConvertToConstraints( const btOpenCLArray<b3RigidBodyCL>* bodyBuf,
|
||||
const btOpenCLArray<btInertiaCL>* shapeBuf,
|
||||
btOpenCLArray<b3Contact4>* contactsIn, btOpenCLArray<b3GpuConstraint4>* contactCOut, void* additionalData,
|
||||
/*void b3Solver::reorderConvertToConstraints( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf,
|
||||
const b3OpenCLArray<b3InertiaCL>* shapeBuf,
|
||||
b3OpenCLArray<b3Contact4>* contactsIn, b3OpenCLArray<b3GpuConstraint4>* contactCOut, void* additionalData,
|
||||
int nContacts, const b3Solver::ConstraintCfg& cfg )
|
||||
{
|
||||
if( m_contactBuffer )
|
||||
@@ -215,8 +215,8 @@ b3Solver::~b3Solver()
|
||||
}
|
||||
if( m_contactBuffer == 0 )
|
||||
{
|
||||
BT_PROFILE("new m_contactBuffer;");
|
||||
m_contactBuffer = new btOpenCLArray<b3Contact4>(m_context,m_queue,nContacts );
|
||||
B3_PROFILE("new m_contactBuffer;");
|
||||
m_contactBuffer = new b3OpenCLArray<b3Contact4>(m_context,m_queue,nContacts );
|
||||
m_contactBuffer->resize(nContacts);
|
||||
}
|
||||
|
||||
@@ -233,47 +233,47 @@ b3Solver::~b3Solver()
|
||||
|
||||
// contactsIn -> m_contactBuffer
|
||||
{
|
||||
BT_PROFILE("sortContacts");
|
||||
B3_PROFILE("sortContacts");
|
||||
sortContacts( bodyBuf, contactsIn, additionalData, nContacts, cfg );
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
BT_PROFILE("m_copyConstraintKernel");
|
||||
B3_PROFILE("m_copyConstraintKernel");
|
||||
|
||||
|
||||
|
||||
btInt4 cdata; cdata.x = nContacts;
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( m_contactBuffer->getBufferCL() ), btBufferInfoCL( contactsIn->getBufferCL() ) };
|
||||
// btLauncherCL launcher( m_queue, data->m_device->getKernel( PATH, "CopyConstraintKernel", "-I ..\\..\\ -Wf,--c++", 0 ) );
|
||||
btLauncherCL launcher( m_queue, m_copyConstraintKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3Int4 cdata; cdata.x = nContacts;
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_contactBuffer->getBufferCL() ), b3BufferInfoCL( contactsIn->getBufferCL() ) };
|
||||
// b3LauncherCL launcher( m_queue, data->m_device->getKernel( PATH, "CopyConstraintKernel", "-I ..\\..\\ -Wf,--c++", 0 ) );
|
||||
b3LauncherCL launcher( m_queue, m_copyConstraintKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( cdata );
|
||||
launcher.launch1D( nContacts, 64 );
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
{
|
||||
BT_PROFILE("batchContacts");
|
||||
B3_PROFILE("batchContacts");
|
||||
b3Solver::batchContacts( contactsIn, nContacts, m_numConstraints, m_offsets, cfg.m_staticIdx );
|
||||
|
||||
}
|
||||
}
|
||||
{
|
||||
BT_PROFILE("waitForCompletion (batchContacts)");
|
||||
B3_PROFILE("waitForCompletion (batchContacts)");
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
//================
|
||||
|
||||
{
|
||||
BT_PROFILE("convertToConstraints");
|
||||
B3_PROFILE("convertToConstraints");
|
||||
b3Solver::convertToConstraints( bodyBuf, shapeBuf, contactsIn, contactCOut, additionalData, nContacts, cfg );
|
||||
}
|
||||
|
||||
{
|
||||
BT_PROFILE("convertToConstraints waitForCompletion");
|
||||
B3_PROFILE("convertToConstraints waitForCompletion");
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
@@ -285,7 +285,7 @@ b3Solver::~b3Solver()
|
||||
float calcRelVel(const b3Vector3& l0, const b3Vector3& l1, const b3Vector3& a0, const b3Vector3& a1,
|
||||
const b3Vector3& linVel0, const b3Vector3& angVel0, const b3Vector3& linVel1, const b3Vector3& angVel1)
|
||||
{
|
||||
return btDot(l0, linVel0) + btDot(a0, angVel0) + btDot(l1, linVel1) + btDot(a1, angVel1);
|
||||
return b3Dot(l0, linVel0) + b3Dot(a0, angVel0) + b3Dot(l1, linVel1) + b3Dot(a1, angVel1);
|
||||
}
|
||||
|
||||
|
||||
@@ -295,8 +295,8 @@ b3Solver::~b3Solver()
|
||||
b3Vector3& linear, b3Vector3& angular0, b3Vector3& angular1)
|
||||
{
|
||||
linear = -n;
|
||||
angular0 = -btCross(r0, n);
|
||||
angular1 = btCross(r1, n);
|
||||
angular0 = -b3Cross(r0, n);
|
||||
angular1 = b3Cross(r1, n);
|
||||
}
|
||||
|
||||
|
||||
@@ -333,8 +333,8 @@ void solveContact(b3GpuConstraint4& cs,
|
||||
float prevSum = cs.m_appliedRambdaDt[ic];
|
||||
float updated = prevSum;
|
||||
updated += rambdaDt;
|
||||
updated = btMax( updated, minRambdaDt[ic] );
|
||||
updated = btMin( updated, maxRambdaDt[ic] );
|
||||
updated = b3Max( updated, minRambdaDt[ic] );
|
||||
updated = b3Min( updated, maxRambdaDt[ic] );
|
||||
rambdaDt = updated - prevSum;
|
||||
cs.m_appliedRambdaDt[ic] = updated;
|
||||
}
|
||||
@@ -344,8 +344,8 @@ void solveContact(b3GpuConstraint4& cs,
|
||||
b3Vector3 angImp0 = (invInertiaA* angular0)*rambdaDt;
|
||||
b3Vector3 angImp1 = (invInertiaB* angular1)*rambdaDt;
|
||||
#ifdef _WIN32
|
||||
btAssert(_finite(linImp0.getX()));
|
||||
btAssert(_finite(linImp1.getX()));
|
||||
b3Assert(_finite(linImp0.getX()));
|
||||
b3Assert(_finite(linImp1.getX()));
|
||||
#endif
|
||||
if( JACOBI )
|
||||
{
|
||||
@@ -393,7 +393,7 @@ void solveContact(b3GpuConstraint4& cs,
|
||||
|
||||
b3Vector3 tangent[2];
|
||||
#if 1
|
||||
btPlaneSpace1 (n, tangent[0],tangent[1]);
|
||||
b3PlaneSpace1 (n, tangent[0],tangent[1]);
|
||||
#else
|
||||
b3Vector3 r = cs.m_worldPos[0]-center;
|
||||
tangent[0] = cross3( n, r );
|
||||
@@ -416,8 +416,8 @@ void solveContact(b3GpuConstraint4& cs,
|
||||
float prevSum = cs.m_fAppliedRambdaDt[i];
|
||||
float updated = prevSum;
|
||||
updated += rambdaDt;
|
||||
updated = btMax( updated, minRambdaDt[i] );
|
||||
updated = btMin( updated, maxRambdaDt[i] );
|
||||
updated = b3Max( updated, minRambdaDt[i] );
|
||||
updated = b3Min( updated, maxRambdaDt[i] );
|
||||
rambdaDt = updated - prevSum;
|
||||
cs.m_fAppliedRambdaDt[i] = updated;
|
||||
}
|
||||
@@ -427,8 +427,8 @@ void solveContact(b3GpuConstraint4& cs,
|
||||
b3Vector3 angImp0 = (invInertiaA* angular0)*rambdaDt;
|
||||
b3Vector3 angImp1 = (invInertiaB* angular1)*rambdaDt;
|
||||
#ifdef _WIN32
|
||||
btAssert(_finite(linImp0.getX()));
|
||||
btAssert(_finite(linImp1.getX()));
|
||||
b3Assert(_finite(linImp0.getX()));
|
||||
b3Assert(_finite(linImp1.getX()));
|
||||
#endif
|
||||
linVelA += linImp0;
|
||||
angVelA += angImp0;
|
||||
@@ -439,10 +439,10 @@ void solveContact(b3GpuConstraint4& cs,
|
||||
{ // angular damping for point constraint
|
||||
b3Vector3 ab = ( posB - posA ).normalized();
|
||||
b3Vector3 ac = ( center - posA ).normalized();
|
||||
if( btDot( ab, ac ) > 0.95f || (invMassA == 0.f || invMassB == 0.f))
|
||||
if( b3Dot( ab, ac ) > 0.95f || (invMassA == 0.f || invMassB == 0.f))
|
||||
{
|
||||
float angNA = btDot( n, angVelA );
|
||||
float angNB = btDot( n, angVelB );
|
||||
float angNA = b3Dot( n, angVelA );
|
||||
float angNB = b3Dot( n, angVelB );
|
||||
|
||||
angVelA -= (angNA*0.1f)*n;
|
||||
angVelB -= (angNB*0.1f)*n;
|
||||
@@ -454,7 +454,7 @@ void solveContact(b3GpuConstraint4& cs,
|
||||
|
||||
struct SolveTask// : public ThreadPool::Task
|
||||
{
|
||||
SolveTask(b3AlignedObjectArray<b3RigidBodyCL>& bodies, b3AlignedObjectArray<btInertiaCL>& shapes, b3AlignedObjectArray<b3GpuConstraint4>& constraints,
|
||||
SolveTask(b3AlignedObjectArray<b3RigidBodyCL>& bodies, b3AlignedObjectArray<b3InertiaCL>& shapes, b3AlignedObjectArray<b3GpuConstraint4>& constraints,
|
||||
int start, int nConstraints)
|
||||
: m_bodies( bodies ), m_shapes( shapes ), m_constraints( constraints ), m_start( start ), m_nConstraints( nConstraints ),
|
||||
m_solveFriction( true ){}
|
||||
@@ -513,7 +513,7 @@ struct SolveTask// : public ThreadPool::Task
|
||||
}
|
||||
|
||||
b3AlignedObjectArray<b3RigidBodyCL>& m_bodies;
|
||||
b3AlignedObjectArray<btInertiaCL>& m_shapes;
|
||||
b3AlignedObjectArray<b3InertiaCL>& m_shapes;
|
||||
b3AlignedObjectArray<b3GpuConstraint4>& m_constraints;
|
||||
int m_start;
|
||||
int m_nConstraints;
|
||||
@@ -521,13 +521,13 @@ struct SolveTask// : public ThreadPool::Task
|
||||
};
|
||||
|
||||
|
||||
void b3Solver::solveContactConstraintHost( btOpenCLArray<b3RigidBodyCL>* bodyBuf, btOpenCLArray<btInertiaCL>* shapeBuf,
|
||||
btOpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches)
|
||||
void b3Solver::solveContactConstraintHost( b3OpenCLArray<b3RigidBodyCL>* bodyBuf, b3OpenCLArray<b3InertiaCL>* shapeBuf,
|
||||
b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches)
|
||||
{
|
||||
|
||||
b3AlignedObjectArray<b3RigidBodyCL> bodyNative;
|
||||
bodyBuf->copyToHost(bodyNative);
|
||||
b3AlignedObjectArray<btInertiaCL> shapeNative;
|
||||
b3AlignedObjectArray<b3InertiaCL> shapeNative;
|
||||
shapeBuf->copyToHost(shapeNative);
|
||||
b3AlignedObjectArray<b3GpuConstraint4> constraintNative;
|
||||
constraint->copyToHost(constraintNative);
|
||||
@@ -553,12 +553,12 @@ void b3Solver::solveContactConstraintHost( btOpenCLArray<b3RigidBodyCL>* bodyBu
|
||||
|
||||
}
|
||||
|
||||
void b3Solver::solveContactConstraint( const btOpenCLArray<b3RigidBodyCL>* bodyBuf, const btOpenCLArray<btInertiaCL>* shapeBuf,
|
||||
btOpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches)
|
||||
void b3Solver::solveContactConstraint( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf, const b3OpenCLArray<b3InertiaCL>* shapeBuf,
|
||||
b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches)
|
||||
{
|
||||
|
||||
|
||||
btInt4 cdata = btMakeInt4( n, 0, 0, 0 );
|
||||
b3Int4 cdata = b3MakeInt4( n, 0, 0, 0 );
|
||||
{
|
||||
|
||||
const int nn = N_SPLIT*N_SPLIT;
|
||||
@@ -570,14 +570,14 @@ void b3Solver::solveContactConstraint( const btOpenCLArray<b3RigidBodyCL>* body
|
||||
int numWorkItems = 64*nn/N_BATCHES;
|
||||
#ifdef DEBUG_ME
|
||||
SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems];
|
||||
adl::btOpenCLArray<SolverDebugInfo> gpuDebugInfo(data->m_device,numWorkItems);
|
||||
adl::b3OpenCLArray<SolverDebugInfo> gpuDebugInfo(data->m_device,numWorkItems);
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
{
|
||||
|
||||
BT_PROFILE("m_batchSolveKernel iterations");
|
||||
B3_PROFILE("m_batchSolveKernel iterations");
|
||||
for(int iter=0; iter<m_nIterations; iter++)
|
||||
{
|
||||
for(int ib=0; ib<N_BATCHES; ib++)
|
||||
@@ -591,24 +591,24 @@ void b3Solver::solveContactConstraint( const btOpenCLArray<b3RigidBodyCL>* body
|
||||
cdata.z = ib;
|
||||
cdata.w = N_SPLIT;
|
||||
|
||||
btLauncherCL launcher( m_queue, m_solveContactKernel );
|
||||
b3LauncherCL launcher( m_queue, m_solveContactKernel );
|
||||
#if 1
|
||||
|
||||
btBufferInfoCL bInfo[] = {
|
||||
b3BufferInfoCL bInfo[] = {
|
||||
|
||||
btBufferInfoCL( bodyBuf->getBufferCL() ),
|
||||
btBufferInfoCL( shapeBuf->getBufferCL() ),
|
||||
btBufferInfoCL( constraint->getBufferCL() ),
|
||||
btBufferInfoCL( m_numConstraints->getBufferCL() ),
|
||||
btBufferInfoCL( m_offsets->getBufferCL() )
|
||||
b3BufferInfoCL( bodyBuf->getBufferCL() ),
|
||||
b3BufferInfoCL( shapeBuf->getBufferCL() ),
|
||||
b3BufferInfoCL( constraint->getBufferCL() ),
|
||||
b3BufferInfoCL( m_numConstraints->getBufferCL() ),
|
||||
b3BufferInfoCL( m_offsets->getBufferCL() )
|
||||
#ifdef DEBUG_ME
|
||||
, btBufferInfoCL(&gpuDebugInfo)
|
||||
, b3BufferInfoCL(&gpuDebugInfo)
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
//launcher.setConst( cdata.x );
|
||||
launcher.setConst( cdata.y );
|
||||
launcher.setConst( cdata.z );
|
||||
@@ -673,7 +673,7 @@ void b3Solver::solveContactConstraint( const btOpenCLArray<b3RigidBodyCL>* body
|
||||
bool applyFriction=true;
|
||||
if (applyFriction)
|
||||
{
|
||||
BT_PROFILE("m_batchSolveKernel iterations2");
|
||||
B3_PROFILE("m_batchSolveKernel iterations2");
|
||||
for(int iter=0; iter<m_nIterations; iter++)
|
||||
{
|
||||
for(int ib=0; ib<N_BATCHES; ib++)
|
||||
@@ -681,18 +681,18 @@ void b3Solver::solveContactConstraint( const btOpenCLArray<b3RigidBodyCL>* body
|
||||
cdata.z = ib;
|
||||
cdata.w = N_SPLIT;
|
||||
|
||||
btBufferInfoCL bInfo[] = {
|
||||
btBufferInfoCL( bodyBuf->getBufferCL() ),
|
||||
btBufferInfoCL( shapeBuf->getBufferCL() ),
|
||||
btBufferInfoCL( constraint->getBufferCL() ),
|
||||
btBufferInfoCL( m_numConstraints->getBufferCL() ),
|
||||
btBufferInfoCL( m_offsets->getBufferCL() )
|
||||
b3BufferInfoCL bInfo[] = {
|
||||
b3BufferInfoCL( bodyBuf->getBufferCL() ),
|
||||
b3BufferInfoCL( shapeBuf->getBufferCL() ),
|
||||
b3BufferInfoCL( constraint->getBufferCL() ),
|
||||
b3BufferInfoCL( m_numConstraints->getBufferCL() ),
|
||||
b3BufferInfoCL( m_offsets->getBufferCL() )
|
||||
#ifdef DEBUG_ME
|
||||
,btBufferInfoCL(&gpuDebugInfo)
|
||||
,b3BufferInfoCL(&gpuDebugInfo)
|
||||
#endif //DEBUG_ME
|
||||
};
|
||||
btLauncherCL launcher( m_queue, m_solveFrictionKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3LauncherCL launcher( m_queue, m_solveFrictionKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
//launcher.setConst( cdata.x );
|
||||
launcher.setConst( cdata.y );
|
||||
launcher.setConst( cdata.z );
|
||||
@@ -712,12 +712,12 @@ void b3Solver::solveContactConstraint( const btOpenCLArray<b3RigidBodyCL>* body
|
||||
|
||||
}
|
||||
|
||||
void b3Solver::convertToConstraints( const btOpenCLArray<b3RigidBodyCL>* bodyBuf,
|
||||
const btOpenCLArray<btInertiaCL>* shapeBuf,
|
||||
btOpenCLArray<b3Contact4>* contactsIn, btOpenCLArray<b3GpuConstraint4>* contactCOut, void* additionalData,
|
||||
void b3Solver::convertToConstraints( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf,
|
||||
const b3OpenCLArray<b3InertiaCL>* shapeBuf,
|
||||
b3OpenCLArray<b3Contact4>* contactsIn, b3OpenCLArray<b3GpuConstraint4>* contactCOut, void* additionalData,
|
||||
int nContacts, const ConstraintCfg& cfg )
|
||||
{
|
||||
btOpenCLArray<b3GpuConstraint4>* constraintNative =0;
|
||||
b3OpenCLArray<b3GpuConstraint4>* constraintNative =0;
|
||||
|
||||
struct CB
|
||||
{
|
||||
@@ -728,7 +728,7 @@ void b3Solver::convertToConstraints( const btOpenCLArray<b3RigidBodyCL>* bodyBuf
|
||||
};
|
||||
|
||||
{
|
||||
BT_PROFILE("m_contactToConstraintKernel");
|
||||
B3_PROFILE("m_contactToConstraintKernel");
|
||||
CB cdata;
|
||||
cdata.m_nContacts = nContacts;
|
||||
cdata.m_dt = cfg.m_dt;
|
||||
@@ -736,10 +736,10 @@ void b3Solver::convertToConstraints( const btOpenCLArray<b3RigidBodyCL>* bodyBuf
|
||||
cdata.m_positionConstraintCoeff = cfg.m_positionConstraintCoeff;
|
||||
|
||||
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( contactsIn->getBufferCL() ), btBufferInfoCL( bodyBuf->getBufferCL() ), btBufferInfoCL( shapeBuf->getBufferCL()),
|
||||
btBufferInfoCL( contactCOut->getBufferCL() )};
|
||||
btLauncherCL launcher( m_queue, m_contactToConstraintKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( contactsIn->getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL() ), b3BufferInfoCL( shapeBuf->getBufferCL()),
|
||||
b3BufferInfoCL( contactCOut->getBufferCL() )};
|
||||
b3LauncherCL launcher( m_queue, m_contactToConstraintKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
//launcher.setConst( cdata );
|
||||
|
||||
launcher.setConst(cdata.m_nContacts);
|
||||
@@ -756,8 +756,8 @@ void b3Solver::convertToConstraints( const btOpenCLArray<b3RigidBodyCL>* bodyBuf
|
||||
}
|
||||
|
||||
/*
|
||||
void b3Solver::sortContacts( const btOpenCLArray<b3RigidBodyCL>* bodyBuf,
|
||||
btOpenCLArray<b3Contact4>* contactsIn, void* additionalData,
|
||||
void b3Solver::sortContacts( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf,
|
||||
b3OpenCLArray<b3Contact4>* contactsIn, void* additionalData,
|
||||
int nContacts, const b3Solver::ConstraintCfg& cfg )
|
||||
{
|
||||
|
||||
@@ -770,8 +770,8 @@ void b3Solver::sortContacts( const btOpenCLArray<b3RigidBodyCL>* bodyBuf,
|
||||
|
||||
int sortSize = NEXTMULTIPLEOF( nContacts, sortAlignment );
|
||||
|
||||
btOpenCLArray<unsigned int>* countsNative = m_numConstraints;//BufferUtils::map<TYPE_CL, false>( data->m_device, &countsHost );
|
||||
btOpenCLArray<unsigned int>* offsetsNative = m_offsets;//BufferUtils::map<TYPE_CL, false>( data->m_device, &offsetsHost );
|
||||
b3OpenCLArray<unsigned int>* countsNative = m_numConstraints;//BufferUtils::map<TYPE_CL, false>( data->m_device, &countsHost );
|
||||
b3OpenCLArray<unsigned int>* offsetsNative = m_offsets;//BufferUtils::map<TYPE_CL, false>( data->m_device, &offsetsHost );
|
||||
|
||||
{ // 2. set cell idx
|
||||
struct CB
|
||||
@@ -782,7 +782,7 @@ void b3Solver::sortContacts( const btOpenCLArray<b3RigidBodyCL>* bodyBuf,
|
||||
int m_nSplit;
|
||||
};
|
||||
|
||||
btAssert( sortSize%64 == 0 );
|
||||
b3Assert( sortSize%64 == 0 );
|
||||
CB cdata;
|
||||
cdata.m_nContacts = nContacts;
|
||||
cdata.m_staticIdx = cfg.m_staticIdx;
|
||||
@@ -790,9 +790,9 @@ void b3Solver::sortContacts( const btOpenCLArray<b3RigidBodyCL>* bodyBuf,
|
||||
cdata.m_nSplit = N_SPLIT;
|
||||
|
||||
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( contactsIn->getBufferCL() ), btBufferInfoCL( bodyBuf->getBufferCL() ), btBufferInfoCL( m_sortDataBuffer->getBufferCL() ) };
|
||||
btLauncherCL launcher( m_queue, m_setSortDataKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( contactsIn->getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL() ), b3BufferInfoCL( m_sortDataBuffer->getBufferCL() ) };
|
||||
b3LauncherCL launcher( m_queue, m_setSortDataKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( cdata );
|
||||
launcher.launch1D( sortSize, 64 );
|
||||
}
|
||||
@@ -805,23 +805,23 @@ void b3Solver::sortContacts( const btOpenCLArray<b3RigidBodyCL>* bodyBuf,
|
||||
m_sort32->execute(*m_sortDataBuffer,sortSize);
|
||||
}
|
||||
{ // 4. find entries
|
||||
m_search->execute( *m_sortDataBuffer, nContacts, *countsNative, N_SPLIT*N_SPLIT, btBoundSearchCL::COUNT);
|
||||
m_search->execute( *m_sortDataBuffer, nContacts, *countsNative, N_SPLIT*N_SPLIT, b3BoundSearchCL::COUNT);
|
||||
|
||||
m_scan->execute( *countsNative, *offsetsNative, N_SPLIT*N_SPLIT );
|
||||
}
|
||||
|
||||
{ // 5. sort constraints by cellIdx
|
||||
// todo. preallocate this
|
||||
// btAssert( contactsIn->getType() == TYPE_HOST );
|
||||
// btOpenCLArray<b3Contact4>* out = BufferUtils::map<TYPE_CL, false>( data->m_device, contactsIn ); // copying contacts to this buffer
|
||||
// b3Assert( contactsIn->getType() == TYPE_HOST );
|
||||
// b3OpenCLArray<b3Contact4>* out = BufferUtils::map<TYPE_CL, false>( data->m_device, contactsIn ); // copying contacts to this buffer
|
||||
|
||||
{
|
||||
|
||||
|
||||
btInt4 cdata; cdata.x = nContacts;
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( contactsIn->getBufferCL() ), btBufferInfoCL( m_contactBuffer->getBufferCL() ), btBufferInfoCL( m_sortDataBuffer->getBufferCL() ) };
|
||||
btLauncherCL launcher( m_queue, m_reorderContactKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3Int4 cdata; cdata.x = nContacts;
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( contactsIn->getBufferCL() ), b3BufferInfoCL( m_contactBuffer->getBufferCL() ), b3BufferInfoCL( m_sortDataBuffer->getBufferCL() ) };
|
||||
b3LauncherCL launcher( m_queue, m_reorderContactKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( cdata );
|
||||
launcher.launch1D( nContacts, 64 );
|
||||
}
|
||||
@@ -834,14 +834,14 @@ void b3Solver::sortContacts( const btOpenCLArray<b3RigidBodyCL>* bodyBuf,
|
||||
|
||||
*/
|
||||
|
||||
void b3Solver::batchContacts( btOpenCLArray<b3Contact4>* contacts, int nContacts, btOpenCLArray<unsigned int>* nNative, btOpenCLArray<unsigned int>* offsetsNative, int staticIdx )
|
||||
void b3Solver::batchContacts( b3OpenCLArray<b3Contact4>* contacts, int nContacts, b3OpenCLArray<unsigned int>* nNative, b3OpenCLArray<unsigned int>* offsetsNative, int staticIdx )
|
||||
{
|
||||
|
||||
int numWorkItems = 64*N_SPLIT*N_SPLIT;
|
||||
{
|
||||
BT_PROFILE("batch generation");
|
||||
B3_PROFILE("batch generation");
|
||||
|
||||
btInt4 cdata;
|
||||
b3Int4 cdata;
|
||||
cdata.x = nContacts;
|
||||
cdata.y = 0;
|
||||
cdata.z = staticIdx;
|
||||
@@ -849,7 +849,7 @@ void b3Solver::batchContacts( btOpenCLArray<b3Contact4>* contacts, int nContact
|
||||
|
||||
#ifdef BATCH_DEBUG
|
||||
SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems];
|
||||
adl::btOpenCLArray<SolverDebugInfo> gpuDebugInfo(data->m_device,numWorkItems);
|
||||
adl::b3OpenCLArray<SolverDebugInfo> gpuDebugInfo(data->m_device,numWorkItems);
|
||||
memset(debugInfo,0,sizeof(SolverDebugInfo)*numWorkItems);
|
||||
gpuDebugInfo.write(debugInfo,numWorkItems);
|
||||
#endif
|
||||
@@ -857,13 +857,13 @@ void b3Solver::batchContacts( btOpenCLArray<b3Contact4>* contacts, int nContact
|
||||
|
||||
|
||||
|
||||
btBufferInfoCL bInfo[] = {
|
||||
btBufferInfoCL( contacts->getBufferCL() ),
|
||||
btBufferInfoCL( m_contactBuffer2->getBufferCL()),
|
||||
btBufferInfoCL( nNative->getBufferCL() ),
|
||||
btBufferInfoCL( offsetsNative->getBufferCL() ),
|
||||
b3BufferInfoCL bInfo[] = {
|
||||
b3BufferInfoCL( contacts->getBufferCL() ),
|
||||
b3BufferInfoCL( m_contactBuffer2->getBufferCL()),
|
||||
b3BufferInfoCL( nNative->getBufferCL() ),
|
||||
b3BufferInfoCL( offsetsNative->getBufferCL() ),
|
||||
#ifdef BATCH_DEBUG
|
||||
, btBufferInfoCL(&gpuDebugInfo)
|
||||
, b3BufferInfoCL(&gpuDebugInfo)
|
||||
#endif
|
||||
};
|
||||
|
||||
@@ -871,11 +871,11 @@ void b3Solver::batchContacts( btOpenCLArray<b3Contact4>* contacts, int nContact
|
||||
|
||||
|
||||
{
|
||||
BT_PROFILE("batchingKernel");
|
||||
//btLauncherCL launcher( m_queue, m_batchingKernel);
|
||||
B3_PROFILE("batchingKernel");
|
||||
//b3LauncherCL launcher( m_queue, m_batchingKernel);
|
||||
cl_kernel k = useNewBatchingKernel ? m_batchingKernelNew : m_batchingKernel;
|
||||
|
||||
btLauncherCL launcher( m_queue, k);
|
||||
b3LauncherCL launcher( m_queue, k);
|
||||
if (!useNewBatchingKernel )
|
||||
{
|
||||
launcher.setBuffer( contacts->getBufferCL() );
|
||||
@@ -927,7 +927,7 @@ void b3Solver::batchContacts( btOpenCLArray<b3Contact4>* contacts, int nContact
|
||||
}
|
||||
|
||||
// copy buffer to buffer
|
||||
//btAssert(m_contactBuffer->size()==nContacts);
|
||||
//b3Assert(m_contactBuffer->size()==nContacts);
|
||||
//contacts->copyFromOpenCLArray( *m_contactBuffer);
|
||||
//clFinish(m_queue);//needed?
|
||||
|
||||
|
||||
@@ -17,20 +17,20 @@ subject to the following restrictions:
|
||||
#ifndef __ADL_SOLVER_H
|
||||
#define __ADL_SOLVER_H
|
||||
|
||||
#include "../../parallel_primitives/host/btOpenCLArray.h"
|
||||
#include "../../parallel_primitives/host/b3OpenCLArray.h"
|
||||
#include "../host/b3GpuConstraint4.h"
|
||||
#include "Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h"
|
||||
#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h"
|
||||
|
||||
#include "../host/b3GpuConstraint4.h"
|
||||
#include "../../parallel_primitives/host/btPrefixScanCL.h"
|
||||
#include "../../parallel_primitives/host/btRadixSort32CL.h"
|
||||
#include "../../parallel_primitives/host/btBoundSearchCL.h"
|
||||
#include "../../parallel_primitives/host/b3PrefixScanCL.h"
|
||||
#include "../../parallel_primitives/host/b3RadixSort32CL.h"
|
||||
#include "../../parallel_primitives/host/b3BoundSearchCL.h"
|
||||
|
||||
#include "../../basic_initialize/b3OpenCLUtils.h"
|
||||
|
||||
|
||||
#define BTNEXTMULTIPLEOF(num, alignment) (((num)/(alignment) + (((num)%(alignment)==0)?0:1))*(alignment))
|
||||
#define B3NEXTMULTIPLEOF(num, alignment) (((num)/(alignment) + (((num)%(alignment)==0)?0:1))*(alignment))
|
||||
|
||||
class b3SolverBase
|
||||
{
|
||||
@@ -69,8 +69,8 @@ class b3Solver : public b3SolverBase
|
||||
cl_command_queue m_queue;
|
||||
|
||||
|
||||
btOpenCLArray<unsigned int>* m_numConstraints;
|
||||
btOpenCLArray<unsigned int>* m_offsets;
|
||||
b3OpenCLArray<unsigned int>* m_numConstraints;
|
||||
b3OpenCLArray<unsigned int>* m_offsets;
|
||||
|
||||
|
||||
int m_nIterations;
|
||||
@@ -83,12 +83,12 @@ class b3Solver : public b3SolverBase
|
||||
cl_kernel m_reorderContactKernel;
|
||||
cl_kernel m_copyConstraintKernel;
|
||||
|
||||
class btRadixSort32CL* m_sort32;
|
||||
class btBoundSearchCL* m_search;
|
||||
class btPrefixScanCL* m_scan;
|
||||
class b3RadixSort32CL* m_sort32;
|
||||
class b3BoundSearchCL* m_search;
|
||||
class b3PrefixScanCL* m_scan;
|
||||
|
||||
btOpenCLArray<btSortData>* m_sortDataBuffer;
|
||||
btOpenCLArray<b3Contact4>* m_contactBuffer2;
|
||||
b3OpenCLArray<b3SortData>* m_sortDataBuffer;
|
||||
b3OpenCLArray<b3Contact4>* m_contactBuffer2;
|
||||
|
||||
enum
|
||||
{
|
||||
@@ -102,19 +102,19 @@ class b3Solver : public b3SolverBase
|
||||
|
||||
virtual ~b3Solver();
|
||||
|
||||
void solveContactConstraint( const btOpenCLArray<b3RigidBodyCL>* bodyBuf, const btOpenCLArray<btInertiaCL>* inertiaBuf,
|
||||
btOpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches);
|
||||
void solveContactConstraint( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf, const b3OpenCLArray<b3InertiaCL>* inertiaBuf,
|
||||
b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches);
|
||||
|
||||
void solveContactConstraintHost( btOpenCLArray<b3RigidBodyCL>* bodyBuf, btOpenCLArray<btInertiaCL>* shapeBuf,
|
||||
btOpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches);
|
||||
void solveContactConstraintHost( b3OpenCLArray<b3RigidBodyCL>* bodyBuf, b3OpenCLArray<b3InertiaCL>* shapeBuf,
|
||||
b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches);
|
||||
|
||||
|
||||
void convertToConstraints( const btOpenCLArray<b3RigidBodyCL>* bodyBuf,
|
||||
const btOpenCLArray<btInertiaCL>* shapeBuf,
|
||||
btOpenCLArray<b3Contact4>* contactsIn, btOpenCLArray<b3GpuConstraint4>* contactCOut, void* additionalData,
|
||||
void convertToConstraints( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf,
|
||||
const b3OpenCLArray<b3InertiaCL>* shapeBuf,
|
||||
b3OpenCLArray<b3Contact4>* contactsIn, b3OpenCLArray<b3GpuConstraint4>* contactCOut, void* additionalData,
|
||||
int nContacts, const ConstraintCfg& cfg );
|
||||
|
||||
void batchContacts( btOpenCLArray<b3Contact4>* contacts, int nContacts, btOpenCLArray<unsigned int>* n, btOpenCLArray<unsigned int>* offsets, int staticIdx );
|
||||
void batchContacts( b3OpenCLArray<b3Contact4>* contacts, int nContacts, b3OpenCLArray<unsigned int>* n, b3OpenCLArray<unsigned int>* offsets, int staticIdx );
|
||||
|
||||
};
|
||||
|
||||
|
||||
@@ -210,7 +210,7 @@ static const char* batchingKernelsNewCL= \
|
||||
"\n"
|
||||
" if (i!=numValidConstraints)\n"
|
||||
" {\n"
|
||||
" //btSwap(cs[i],cs[numValidConstraints]);\n"
|
||||
" //b3Swap(cs[i],cs[numValidConstraints]);\n"
|
||||
" \n"
|
||||
" Contact4 tmp = cs[i];\n"
|
||||
" cs[i] = cs[numValidConstraints];\n"
|
||||
|
||||
@@ -47,7 +47,7 @@ static const char* integrateKernelCL= \
|
||||
" integrateTransformsKernel( __global Body* bodies,const int numNodes, float timeStep, float angularDamping, float4 gravityAcceleration)\n"
|
||||
"{\n"
|
||||
" int nodeID = get_global_id(0);\n"
|
||||
" float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);\n"
|
||||
" float B3_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);\n"
|
||||
" if( nodeID < numNodes && (bodies[nodeID].m_invMass != 0.f))\n"
|
||||
" {\n"
|
||||
" //angular velocity\n"
|
||||
@@ -61,9 +61,9 @@ static const char* integrateKernelCL= \
|
||||
" float4 angvel = bodies[nodeID].m_angVel;\n"
|
||||
" float fAngle = native_sqrt(dot(angvel, angvel));\n"
|
||||
" //limit the angular motion\n"
|
||||
" if(fAngle*timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD)\n"
|
||||
" if(fAngle*timeStep > B3_GPU_ANGULAR_MOTION_THRESHOLD)\n"
|
||||
" {\n"
|
||||
" fAngle = BT_GPU_ANGULAR_MOTION_THRESHOLD / timeStep;\n"
|
||||
" fAngle = B3_GPU_ANGULAR_MOTION_THRESHOLD / timeStep;\n"
|
||||
" }\n"
|
||||
" if(fAngle < 0.001f)\n"
|
||||
" {\n"
|
||||
|
||||
@@ -313,8 +313,8 @@ static const char* solveContactCL= \
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"void btPlaneSpace1 (const float4* n, float4* p, float4* q);\n"
|
||||
" void btPlaneSpace1 (const float4* n, float4* p, float4* q)\n"
|
||||
"void b3PlaneSpace1 (const float4* n, float4* p, float4* q);\n"
|
||||
" void b3PlaneSpace1 (const float4* n, float4* p, float4* q)\n"
|
||||
"{\n"
|
||||
" if (fabs(n[0].z) > 0.70710678f) {\n"
|
||||
" // choose p in y-z plane\n"
|
||||
|
||||
@@ -265,8 +265,8 @@ static const char* solveFrictionCL= \
|
||||
" float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1);\n"
|
||||
" return -1.f/(jmj0+jmj1+jmj2+jmj3);\n"
|
||||
"}\n"
|
||||
"void btPlaneSpace1 (const float4* n, float4* p, float4* q);\n"
|
||||
" void btPlaneSpace1 (const float4* n, float4* p, float4* q)\n"
|
||||
"void b3PlaneSpace1 (const float4* n, float4* p, float4* q);\n"
|
||||
" void b3PlaneSpace1 (const float4* n, float4* p, float4* q)\n"
|
||||
"{\n"
|
||||
" if (fabs(n[0].z) > 0.70710678f) {\n"
|
||||
" // choose p in y-z plane\n"
|
||||
@@ -347,7 +347,7 @@ static const char* solveFrictionCL= \
|
||||
" float4 n = -cs->m_linear;\n"
|
||||
" \n"
|
||||
" float4 tangent[2];\n"
|
||||
" btPlaneSpace1(&n,&tangent[0],&tangent[1]);\n"
|
||||
" b3PlaneSpace1(&n,&tangent[0],&tangent[1]);\n"
|
||||
" float4 angular0, angular1, linear;\n"
|
||||
" float4 r0 = center - posA;\n"
|
||||
" float4 r1 = center - posB;\n"
|
||||
|
||||
@@ -489,8 +489,8 @@ static const char* solverSetupCL= \
|
||||
"} ConstBufferSSD;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"void btPlaneSpace1 (float4 n, float4* p, float4* q);\n"
|
||||
" void btPlaneSpace1 (float4 n, float4* p, float4* q)\n"
|
||||
"void b3PlaneSpace1 (float4 n, float4* p, float4* q);\n"
|
||||
" void b3PlaneSpace1 (float4 n, float4* p, float4* q)\n"
|
||||
"{\n"
|
||||
" if (fabs(n.z) > 0.70710678f) {\n"
|
||||
" // choose p in y-z plane\n"
|
||||
@@ -577,7 +577,7 @@ static const char* solverSetupCL= \
|
||||
" center /= (float)src->m_worldNormal.w;\n"
|
||||
"\n"
|
||||
" float4 tangent[2];\n"
|
||||
" btPlaneSpace1(src->m_worldNormal,&tangent[0],&tangent[1]);\n"
|
||||
" b3PlaneSpace1(src->m_worldNormal,&tangent[0],&tangent[1]);\n"
|
||||
" \n"
|
||||
" float4 r[2];\n"
|
||||
" r[0] = center - posA;\n"
|
||||
|
||||
@@ -488,8 +488,8 @@ static const char* solverUtilsCL= \
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"void btPlaneSpace1 (float4 n, float4* p, float4* q);\n"
|
||||
" void btPlaneSpace1 (float4 n, float4* p, float4* q)\n"
|
||||
"void b3PlaneSpace1 (float4 n, float4* p, float4* q);\n"
|
||||
" void b3PlaneSpace1 (float4 n, float4* p, float4* q)\n"
|
||||
"{\n"
|
||||
" if (fabs(n.z) > 0.70710678f) {\n"
|
||||
" // choose p in y-z plane\n"
|
||||
@@ -739,7 +739,7 @@ static const char* solverUtilsCL= \
|
||||
" float4 n = -cs->m_linear;\n"
|
||||
" \n"
|
||||
" float4 tangent[2];\n"
|
||||
" btPlaneSpace1(n,&tangent[0],&tangent[1]);\n"
|
||||
" b3PlaneSpace1(n,&tangent[0],&tangent[1]);\n"
|
||||
" float4 angular0, angular1, linear;\n"
|
||||
" float4 r0 = center - posA;\n"
|
||||
" float4 r1 = center - posB;\n"
|
||||
@@ -896,7 +896,7 @@ static const char* solverUtilsCL= \
|
||||
" center /= (float)src->m_worldNormal.w;\n"
|
||||
"\n"
|
||||
" float4 tangent[2];\n"
|
||||
" btPlaneSpace1(src->m_worldNormal,&tangent[0],&tangent[1]);\n"
|
||||
" b3PlaneSpace1(src->m_worldNormal,&tangent[0],&tangent[1]);\n"
|
||||
" \n"
|
||||
" float4 r[2];\n"
|
||||
" r[0] = center - posA;\n"
|
||||
|
||||
@@ -120,7 +120,7 @@ static const char* updateAabbsKernelCL= \
|
||||
" float fy;\n"
|
||||
" float fz;\n"
|
||||
" int uw;\n"
|
||||
"} btAABBCL;\n"
|
||||
"} b3AABBCL;\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Matrix3x3 mtTranspose(Matrix3x3 m)\n"
|
||||
@@ -156,7 +156,7 @@ static const char* updateAabbsKernelCL= \
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void initializeGpuAabbsFull( const int numNodes, __global Body* gBodies,__global Collidable* collidables, __global btAABBCL* plocalShapeAABB, __global btAABBCL* pAABB)\n"
|
||||
"__kernel void initializeGpuAabbsFull( const int numNodes, __global Body* gBodies,__global Collidable* collidables, __global b3AABBCL* plocalShapeAABB, __global b3AABBCL* pAABB)\n"
|
||||
"{\n"
|
||||
" int nodeID = get_global_id(0);\n"
|
||||
" \n"
|
||||
@@ -171,8 +171,8 @@ static const char* updateAabbsKernelCL= \
|
||||
" \n"
|
||||
" if (shapeIndex>=0)\n"
|
||||
" {\n"
|
||||
" btAABBCL minAabb = plocalShapeAABB[collidableIndex*2];\n"
|
||||
" btAABBCL maxAabb = plocalShapeAABB[collidableIndex*2+1];\n"
|
||||
" b3AABBCL minAabb = plocalShapeAABB[collidableIndex*2];\n"
|
||||
" b3AABBCL maxAabb = plocalShapeAABB[collidableIndex*2+1];\n"
|
||||
" \n"
|
||||
" float4 halfExtents = ((float4)(maxAabb.fx - minAabb.fx,maxAabb.fy - minAabb.fy,maxAabb.fz - minAabb.fz,0.f))*0.5f;\n"
|
||||
" float4 localCenter = ((float4)(maxAabb.fx + minAabb.fx,maxAabb.fy + minAabb.fy,maxAabb.fz + minAabb.fz,0.f))*0.5f;\n"
|
||||
|
||||
@@ -17,10 +17,10 @@
|
||||
|
||||
|
||||
#include "b3OpenCLUtils.h"
|
||||
#include "../parallel_primitives/host/btOpenCLArray.h"
|
||||
#include "../parallel_primitives/host/btLauncherCL.h"
|
||||
#include "../parallel_primitives/host/b3OpenCLArray.h"
|
||||
#include "../parallel_primitives/host/b3LauncherCL.h"
|
||||
#include "Bullet3Common/b3Quickprof.h"
|
||||
#include "../parallel_primitives/host/btFillCL.h"
|
||||
#include "../parallel_primitives/host/b3FillCL.h"
|
||||
#include "Bullet3Common/b3CommandLineArgs.h"
|
||||
|
||||
#include <string.h>
|
||||
@@ -93,7 +93,7 @@ int main(int argc, char **argv)
|
||||
|
||||
const int mem_size = nx*ny*sizeof(float);
|
||||
const int num_elements = nx*ny;
|
||||
btClock clock;
|
||||
b3Clock clock;
|
||||
double startEvent=0.f;
|
||||
double stopEvent=0.f;
|
||||
|
||||
@@ -172,7 +172,7 @@ char flags[1024]={0};
|
||||
transposeCoalescedKernel = b3OpenCLUtils::compileCLKernelFromString(ctx,device,cSourceCL,"transposeCoalescedKernel",&ciErrNum,0,flags);
|
||||
transposeNoBankConflictsKernel = b3OpenCLUtils::compileCLKernelFromString(ctx,device,cSourceCL,"transposeNoBankConflictsKernel",&ciErrNum,0,flags);
|
||||
|
||||
btFillCL clMemSet(ctx,device,queue);
|
||||
b3FillCL clMemSet(ctx,device,queue);
|
||||
|
||||
printf("\n============================================\n");
|
||||
|
||||
@@ -184,9 +184,9 @@ char flags[1024]={0};
|
||||
float *h_tdata = (float*)malloc(mem_size);
|
||||
float *gold = (float*)malloc(mem_size);
|
||||
|
||||
btOpenCLArray<float> d_idataCL(ctx,queue);d_idataCL.resize(num_elements);
|
||||
btOpenCLArray<float> d_cdataCL(ctx,queue);d_cdataCL.resize(num_elements);
|
||||
btOpenCLArray<float> d_tdataCL(ctx,queue);d_tdataCL.resize(num_elements);
|
||||
b3OpenCLArray<float> d_idataCL(ctx,queue);d_idataCL.resize(num_elements);
|
||||
b3OpenCLArray<float> d_cdataCL(ctx,queue);d_cdataCL.resize(num_elements);
|
||||
b3OpenCLArray<float> d_tdataCL(ctx,queue);d_tdataCL.resize(num_elements);
|
||||
|
||||
|
||||
// check parameters and calculate execution configuration
|
||||
@@ -235,7 +235,7 @@ char flags[1024]={0};
|
||||
|
||||
{
|
||||
// warm up
|
||||
btLauncherCL launcher( queue, copyKernel);
|
||||
b3LauncherCL launcher( queue, copyKernel);
|
||||
launcher.setBuffer( d_cdataCL.getBufferCL());
|
||||
launcher.setBuffer( d_idataCL.getBufferCL());
|
||||
launcher.launch2D(numThreadsX,numThreadsY,localSizeX,localSizeY );
|
||||
@@ -260,7 +260,7 @@ char flags[1024]={0};
|
||||
clMemSet.execute(d_cdataCL,0.f,num_elements);
|
||||
|
||||
{
|
||||
btLauncherCL launcher( queue, copySharedMemKernel);
|
||||
b3LauncherCL launcher( queue, copySharedMemKernel);
|
||||
launcher.setBuffer( d_cdataCL.getBufferCL());
|
||||
launcher.setBuffer( d_idataCL.getBufferCL());
|
||||
launcher.launch2D(numThreadsX,numThreadsY,localSizeX,localSizeY );
|
||||
@@ -284,7 +284,7 @@ char flags[1024]={0};
|
||||
clMemSet.execute(d_tdataCL,0.f,num_elements);
|
||||
{
|
||||
// warmup
|
||||
btLauncherCL launcher( queue, transposeNaiveKernel);
|
||||
b3LauncherCL launcher( queue, transposeNaiveKernel);
|
||||
launcher.setBuffer( d_tdataCL.getBufferCL());
|
||||
launcher.setBuffer( d_idataCL.getBufferCL());
|
||||
launcher.launch2D(numThreadsX,numThreadsY,localSizeX,localSizeY );
|
||||
@@ -306,7 +306,7 @@ char flags[1024]={0};
|
||||
printf("%25s", "coalesced transpose");
|
||||
clMemSet.execute(d_tdataCL,0.f,num_elements);
|
||||
{
|
||||
btLauncherCL launcher( queue, transposeCoalescedKernel);
|
||||
b3LauncherCL launcher( queue, transposeCoalescedKernel);
|
||||
launcher.setBuffer( d_tdataCL.getBufferCL());
|
||||
launcher.setBuffer( d_idataCL.getBufferCL());
|
||||
launcher.launch2D(numThreadsX,numThreadsY,localSizeX,localSizeY );
|
||||
@@ -329,7 +329,7 @@ char flags[1024]={0};
|
||||
printf("%25s", "conflict-free transpose");
|
||||
clMemSet.execute(d_tdataCL,0.f,num_elements);
|
||||
{
|
||||
btLauncherCL launcher( queue, transposeNoBankConflictsKernel);
|
||||
b3LauncherCL launcher( queue, transposeNoBankConflictsKernel);
|
||||
launcher.setBuffer( d_tdataCL.getBufferCL());
|
||||
launcher.setBuffer( d_idataCL.getBufferCL());
|
||||
launcher.launch2D(numThreadsX,numThreadsY,localSizeX,localSizeY );
|
||||
|
||||
@@ -21,9 +21,9 @@ function createProject(vendor)
|
||||
"test_large_problem_sorting.cpp",
|
||||
"../../basic_initialize/b3OpenCLUtils.cpp",
|
||||
"../../basic_initialize/b3OpenCLUtils.h",
|
||||
"../host/btFillCL.cpp",
|
||||
"../host/btPrefixScanCL.cpp",
|
||||
"../host/btRadixSort32CL.cpp",
|
||||
"../host/b3FillCL.cpp",
|
||||
"../host/b3PrefixScanCL.cpp",
|
||||
"../host/b3RadixSort32CL.cpp",
|
||||
"../../../src/Bullet3Common/b3AlignedAllocator.cpp",
|
||||
"../../../src/Bullet3Common/b3AlignedAllocator.h",
|
||||
"../../../src/Bullet3Common/b3AlignedObjectArray.h",
|
||||
|
||||
@@ -64,7 +64,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "../host/btRadixSort32CL.h"
|
||||
#include "../host/b3RadixSort32CL.h"
|
||||
#include "../../basic_initialize/b3OpenCLUtils.h"
|
||||
#include "Bullet3Common/b3Quickprof.h"
|
||||
|
||||
@@ -119,9 +119,9 @@ void TimedSort(
|
||||
hostData[i] = h_keys[i];
|
||||
}
|
||||
|
||||
btRadixSort32CL sorter(g_cxMainContext,g_device,g_cqCommandQueue);
|
||||
b3RadixSort32CL sorter(g_cxMainContext,g_device,g_cqCommandQueue);
|
||||
|
||||
btOpenCLArray<unsigned int> gpuData(g_cxMainContext,g_cqCommandQueue);
|
||||
b3OpenCLArray<unsigned int> gpuData(g_cxMainContext,g_cqCommandQueue);
|
||||
gpuData.copyFromHost(hostData);
|
||||
//sorter.executeHost(gpuData);
|
||||
sorter.execute(gpuData);
|
||||
@@ -139,7 +139,7 @@ void TimedSort(
|
||||
// Perform the timed number of sorting iterations
|
||||
double elapsed = 0;
|
||||
float duration = 0;
|
||||
btClock watch;
|
||||
b3Clock watch;
|
||||
|
||||
//warm-start
|
||||
gpuData.copyFromHost(hostData);
|
||||
@@ -218,7 +218,7 @@ void TimedSort(
|
||||
printf("Key-values, %d iterations, %d elements\n", iterations, num_elements);
|
||||
|
||||
int max_elements = num_elements;
|
||||
b3AlignedObjectArray<btSortData> hostData;
|
||||
b3AlignedObjectArray<b3SortData> hostData;
|
||||
hostData.resize(num_elements);
|
||||
for (int i=0;i<num_elements;i++)
|
||||
{
|
||||
@@ -226,14 +226,14 @@ void TimedSort(
|
||||
hostData[i].m_value = h_values[i];
|
||||
}
|
||||
|
||||
btRadixSort32CL sorter(g_cxMainContext,g_device,g_cqCommandQueue);
|
||||
b3RadixSort32CL sorter(g_cxMainContext,g_device,g_cqCommandQueue);
|
||||
|
||||
btOpenCLArray<btSortData> gpuData(g_cxMainContext,g_cqCommandQueue);
|
||||
b3OpenCLArray<b3SortData> gpuData(g_cxMainContext,g_cqCommandQueue);
|
||||
gpuData.copyFromHost(hostData);
|
||||
//sorter.executeHost(gpuData);
|
||||
sorter.execute(gpuData);
|
||||
|
||||
b3AlignedObjectArray<btSortData> hostDataSorted;
|
||||
b3AlignedObjectArray<b3SortData> hostDataSorted;
|
||||
gpuData.copyToHost(hostDataSorted);
|
||||
#if 0
|
||||
for (int i=0;i<num_elements;i++)
|
||||
@@ -253,7 +253,7 @@ clFinish(g_cqCommandQueue);
|
||||
// Perform the timed number of sorting iterations
|
||||
double elapsed = 0;
|
||||
float duration = 0;
|
||||
btClock watch;
|
||||
b3Clock watch;
|
||||
|
||||
//warm-start
|
||||
gpuData.copyFromHost(hostData);
|
||||
@@ -649,14 +649,14 @@ int main( int argc, char** argv)
|
||||
args.GetCmdLineArgument("deviceId", gPreferredDeviceId);
|
||||
args.GetCmdLineArgument("platformId", gPreferredPlatformId);
|
||||
|
||||
printf("Initialize OpenCL using btOpenCLUtils_createContextFromType\n");
|
||||
printf("Initialize OpenCL using b3OpenCLUtils_createContextFromType\n");
|
||||
cl_platform_id platformId;
|
||||
g_cxMainContext = btOpenCLUtils_createContextFromType(CL_DEVICE_TYPE_ALL, &ciErrNum, 0, 0,gPreferredDeviceId,gPreferredPlatformId,&platformId);
|
||||
// g_cxMainContext = btOpenCLUtils_createContextFromType(CL_DEVICE_TYPE_GPU, &ciErrNum, 0, 0,gPreferredDeviceId,gPreferredPlatformId,&platformId);
|
||||
g_cxMainContext = b3OpenCLUtils_createContextFromType(CL_DEVICE_TYPE_ALL, &ciErrNum, 0, 0,gPreferredDeviceId,gPreferredPlatformId,&platformId);
|
||||
// g_cxMainContext = b3OpenCLUtils_createContextFromType(CL_DEVICE_TYPE_GPU, &ciErrNum, 0, 0,gPreferredDeviceId,gPreferredPlatformId,&platformId);
|
||||
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
int numDev = btOpenCLUtils_getNumDevices(g_cxMainContext);
|
||||
int numDev = b3OpenCLUtils_getNumDevices(g_cxMainContext);
|
||||
|
||||
if (!numDev)
|
||||
{
|
||||
@@ -665,8 +665,8 @@ int main( int argc, char** argv)
|
||||
}
|
||||
int result;
|
||||
int devId = 0;
|
||||
g_device = btOpenCLUtils_getDevice(g_cxMainContext,devId);
|
||||
btOpenCLUtils_printDeviceInfo(g_device);
|
||||
g_device = b3OpenCLUtils_getDevice(g_cxMainContext,devId);
|
||||
b3OpenCLUtils_printDeviceInfo(g_device);
|
||||
// create a command-queue
|
||||
g_cqCommandQueue = clCreateCommandQueue(g_cxMainContext, g_device, 0, &ciErrNum);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
@@ -20,12 +20,12 @@ subject to the following restrictions:
|
||||
#define KERNEL2 "SubtractKernel"
|
||||
|
||||
|
||||
#include "btBoundSearchCL.h"
|
||||
#include "b3BoundSearchCL.h"
|
||||
#include "../../basic_initialize/b3OpenCLUtils.h"
|
||||
#include "btLauncherCL.h"
|
||||
#include "b3LauncherCL.h"
|
||||
#include "../kernels/BoundSearchKernelsCL.h"
|
||||
|
||||
btBoundSearchCL::btBoundSearchCL(cl_context ctx, cl_device_id device, cl_command_queue queue, int maxSize)
|
||||
b3BoundSearchCL::b3BoundSearchCL(cl_context ctx, cl_device_id device, cl_command_queue queue, int maxSize)
|
||||
:m_context(ctx),
|
||||
m_device(device),
|
||||
m_queue(queue)
|
||||
@@ -38,31 +38,31 @@ btBoundSearchCL::btBoundSearchCL(cl_context ctx, cl_device_id device, cl_command
|
||||
const char* kernelSource = boundSearchKernelsCL;
|
||||
|
||||
cl_program boundSearchProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, kernelSource, &pErrNum,additionalMacros, BOUNDSEARCH_PATH);
|
||||
btAssert(boundSearchProg);
|
||||
b3Assert(boundSearchProg);
|
||||
|
||||
m_lowerSortDataKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SearchSortDataLowerKernel", &pErrNum, boundSearchProg,additionalMacros );
|
||||
btAssert(m_lowerSortDataKernel );
|
||||
b3Assert(m_lowerSortDataKernel );
|
||||
|
||||
m_upperSortDataKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SearchSortDataUpperKernel", &pErrNum, boundSearchProg,additionalMacros );
|
||||
btAssert(m_upperSortDataKernel);
|
||||
b3Assert(m_upperSortDataKernel);
|
||||
|
||||
m_subtractKernel = 0;
|
||||
|
||||
if( maxSize )
|
||||
{
|
||||
m_subtractKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SubtractKernel", &pErrNum, boundSearchProg,additionalMacros );
|
||||
btAssert(m_subtractKernel);
|
||||
b3Assert(m_subtractKernel);
|
||||
}
|
||||
|
||||
//m_constBuffer = new btOpenCLArray<btInt4>( device, 1, BufferBase::BUFFER_CONST );
|
||||
//m_constBuffer = new b3OpenCLArray<b3Int4>( device, 1, BufferBase::BUFFER_CONST );
|
||||
|
||||
m_lower = (maxSize == 0)? 0: new btOpenCLArray<unsigned int>(ctx,queue,maxSize );
|
||||
m_upper = (maxSize == 0)? 0: new btOpenCLArray<unsigned int>(ctx,queue, maxSize );
|
||||
m_lower = (maxSize == 0)? 0: new b3OpenCLArray<unsigned int>(ctx,queue,maxSize );
|
||||
m_upper = (maxSize == 0)? 0: new b3OpenCLArray<unsigned int>(ctx,queue, maxSize );
|
||||
|
||||
m_filler = new btFillCL(ctx,device,queue);
|
||||
m_filler = new b3FillCL(ctx,device,queue);
|
||||
}
|
||||
|
||||
btBoundSearchCL::~btBoundSearchCL()
|
||||
b3BoundSearchCL::~b3BoundSearchCL()
|
||||
{
|
||||
|
||||
delete m_lower;
|
||||
@@ -77,18 +77,18 @@ btBoundSearchCL::~btBoundSearchCL()
|
||||
}
|
||||
|
||||
|
||||
void btBoundSearchCL::execute(btOpenCLArray<btSortData>& src, int nSrc, btOpenCLArray<unsigned int>& dst, int nDst, Option option )
|
||||
void b3BoundSearchCL::execute(b3OpenCLArray<b3SortData>& src, int nSrc, b3OpenCLArray<unsigned int>& dst, int nDst, Option option )
|
||||
{
|
||||
btInt4 constBuffer;
|
||||
b3Int4 constBuffer;
|
||||
constBuffer.x = nSrc;
|
||||
constBuffer.y = nDst;
|
||||
|
||||
if( option == BOUND_LOWER )
|
||||
{
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( src.getBufferCL(), true ), btBufferInfoCL( dst.getBufferCL()) };
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src.getBufferCL(), true ), b3BufferInfoCL( dst.getBufferCL()) };
|
||||
|
||||
btLauncherCL launcher( m_queue, m_lowerSortDataKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3LauncherCL launcher( m_queue, m_lowerSortDataKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( nSrc );
|
||||
launcher.setConst( nDst );
|
||||
|
||||
@@ -96,10 +96,10 @@ void btBoundSearchCL::execute(btOpenCLArray<btSortData>& src, int nSrc, btOpenCL
|
||||
}
|
||||
else if( option == BOUND_UPPER )
|
||||
{
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( src.getBufferCL(), true ), btBufferInfoCL( dst.getBufferCL() ) };
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src.getBufferCL(), true ), b3BufferInfoCL( dst.getBufferCL() ) };
|
||||
|
||||
btLauncherCL launcher(m_queue, m_upperSortDataKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3LauncherCL launcher(m_queue, m_upperSortDataKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( nSrc );
|
||||
launcher.setConst( nDst );
|
||||
|
||||
@@ -107,10 +107,10 @@ void btBoundSearchCL::execute(btOpenCLArray<btSortData>& src, int nSrc, btOpenCL
|
||||
}
|
||||
else if( option == COUNT )
|
||||
{
|
||||
btAssert( m_lower );
|
||||
btAssert( m_upper );
|
||||
btAssert( m_lower->capacity() <= (int)nDst );
|
||||
btAssert( m_upper->capacity() <= (int)nDst );
|
||||
b3Assert( m_lower );
|
||||
b3Assert( m_upper );
|
||||
b3Assert( m_lower->capacity() <= (int)nDst );
|
||||
b3Assert( m_upper->capacity() <= (int)nDst );
|
||||
|
||||
int zero = 0;
|
||||
m_filler->execute( *m_lower, zero, nDst );
|
||||
@@ -120,10 +120,10 @@ void btBoundSearchCL::execute(btOpenCLArray<btSortData>& src, int nSrc, btOpenCL
|
||||
execute( src, nSrc, *m_upper, nDst, BOUND_UPPER );
|
||||
|
||||
{
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( m_upper->getBufferCL(), true ), btBufferInfoCL( m_lower->getBufferCL(), true ), btBufferInfoCL( dst.getBufferCL() ) };
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_upper->getBufferCL(), true ), b3BufferInfoCL( m_lower->getBufferCL(), true ), b3BufferInfoCL( dst.getBufferCL() ) };
|
||||
|
||||
btLauncherCL launcher( m_queue, m_subtractKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3LauncherCL launcher( m_queue, m_subtractKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( nSrc );
|
||||
launcher.setConst( nDst );
|
||||
|
||||
@@ -132,21 +132,21 @@ void btBoundSearchCL::execute(btOpenCLArray<btSortData>& src, int nSrc, btOpenCL
|
||||
}
|
||||
else
|
||||
{
|
||||
btAssert( 0 );
|
||||
b3Assert( 0 );
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
void btBoundSearchCL::executeHost( b3AlignedObjectArray<btSortData>& src, int nSrc,
|
||||
void b3BoundSearchCL::executeHost( b3AlignedObjectArray<b3SortData>& src, int nSrc,
|
||||
b3AlignedObjectArray<unsigned int>& dst, int nDst, Option option )
|
||||
{
|
||||
|
||||
|
||||
for(int i=0; i<nSrc-1; i++)
|
||||
btAssert( src[i].m_key <= src[i+1].m_key );
|
||||
b3Assert( src[i].m_key <= src[i+1].m_key );
|
||||
|
||||
btSortData minData,zeroData,maxData;
|
||||
b3SortData minData,zeroData,maxData;
|
||||
minData.m_key = -1;
|
||||
minData.m_value = -1;
|
||||
zeroData.m_key=0;
|
||||
@@ -158,8 +158,8 @@ void btBoundSearchCL::executeHost( b3AlignedObjectArray<btSortData>& src, int nS
|
||||
{
|
||||
for(int i=0; i<nSrc; i++)
|
||||
{
|
||||
btSortData& iData = (i==0)? minData: src[i-1];
|
||||
btSortData& jData = (i==nSrc)? maxData: src[i];
|
||||
b3SortData& iData = (i==0)? minData: src[i-1];
|
||||
b3SortData& jData = (i==nSrc)? maxData: src[i];
|
||||
|
||||
if( iData.m_key != jData.m_key )
|
||||
{
|
||||
@@ -174,8 +174,8 @@ void btBoundSearchCL::executeHost( b3AlignedObjectArray<btSortData>& src, int nS
|
||||
{
|
||||
for(int i=1; i<nSrc+1; i++)
|
||||
{
|
||||
btSortData& iData = src[i-1];
|
||||
btSortData& jData = (i==nSrc)? maxData: src[i];
|
||||
b3SortData& iData = src[i-1];
|
||||
b3SortData& jData = (i==nSrc)? maxData: src[i];
|
||||
|
||||
if( iData.m_key != jData.m_key )
|
||||
{
|
||||
@@ -208,6 +208,6 @@ void btBoundSearchCL::executeHost( b3AlignedObjectArray<btSortData>& src, int nS
|
||||
}
|
||||
else
|
||||
{
|
||||
btAssert( 0 );
|
||||
b3Assert( 0 );
|
||||
}
|
||||
}
|
||||
@@ -13,8 +13,8 @@ subject to the following restrictions:
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
#ifndef BT_BOUNDSEARCH_H
|
||||
#define BT_BOUNDSEARCH_H
|
||||
#ifndef B3_BOUNDSEARCH_H
|
||||
#define B3_BOUNDSEARCH_H
|
||||
|
||||
#pragma once
|
||||
|
||||
@@ -24,10 +24,10 @@ subject to the following restrictions:
|
||||
#include <AdlPrimitives/Fill/Fill.h>
|
||||
*/
|
||||
|
||||
#include "btOpenCLArray.h"
|
||||
#include "btFillCL.h"
|
||||
#include "btRadixSort32CL.h" //for btSortData (perhaps move it?)
|
||||
class btBoundSearchCL
|
||||
#include "b3OpenCLArray.h"
|
||||
#include "b3FillCL.h"
|
||||
#include "b3RadixSort32CL.h" //for b3SortData (perhaps move it?)
|
||||
class b3BoundSearchCL
|
||||
{
|
||||
public:
|
||||
|
||||
@@ -47,21 +47,21 @@ class btBoundSearchCL
|
||||
cl_kernel m_upperSortDataKernel;
|
||||
cl_kernel m_subtractKernel;
|
||||
|
||||
btOpenCLArray<btInt4>* m_constbtOpenCLArray;
|
||||
btOpenCLArray<unsigned int>* m_lower;
|
||||
btOpenCLArray<unsigned int>* m_upper;
|
||||
b3OpenCLArray<b3Int4>* m_constbtOpenCLArray;
|
||||
b3OpenCLArray<unsigned int>* m_lower;
|
||||
b3OpenCLArray<unsigned int>* m_upper;
|
||||
|
||||
btFillCL* m_filler;
|
||||
b3FillCL* m_filler;
|
||||
|
||||
btBoundSearchCL(cl_context context, cl_device_id device, cl_command_queue queue, int size);
|
||||
b3BoundSearchCL(cl_context context, cl_device_id device, cl_command_queue queue, int size);
|
||||
|
||||
virtual ~btBoundSearchCL();
|
||||
virtual ~b3BoundSearchCL();
|
||||
|
||||
// src has to be src[i].m_key <= src[i+1].m_key
|
||||
void execute( btOpenCLArray<btSortData>& src, int nSrc, btOpenCLArray<unsigned int>& dst, int nDst, Option option = BOUND_LOWER );
|
||||
void execute( b3OpenCLArray<b3SortData>& src, int nSrc, b3OpenCLArray<unsigned int>& dst, int nDst, Option option = BOUND_LOWER );
|
||||
|
||||
void executeHost( b3AlignedObjectArray<btSortData>& src, int nSrc, b3AlignedObjectArray<unsigned int>& dst, int nDst, Option option = BOUND_LOWER);
|
||||
void executeHost( b3AlignedObjectArray<b3SortData>& src, int nSrc, b3AlignedObjectArray<unsigned int>& dst, int nDst, Option option = BOUND_LOWER);
|
||||
};
|
||||
|
||||
|
||||
#endif //BT_BOUNDSEARCH_H
|
||||
#endif //B3_BOUNDSEARCH_H
|
||||
19
opencl/parallel_primitives/host/b3BufferInfoCL.h
Normal file
19
opencl/parallel_primitives/host/b3BufferInfoCL.h
Normal file
@@ -0,0 +1,19 @@
|
||||
|
||||
#ifndef B3_BUFFER_INFO_CL_H
|
||||
#define B3_BUFFER_INFO_CL_H
|
||||
|
||||
#include "b3OpenCLArray.h"
|
||||
|
||||
|
||||
struct b3BufferInfoCL
|
||||
{
|
||||
//b3BufferInfoCL(){}
|
||||
|
||||
// template<typename T>
|
||||
b3BufferInfoCL(cl_mem buff, bool isReadOnly = false): m_clBuffer(buff), m_isReadOnly(isReadOnly){}
|
||||
|
||||
cl_mem m_clBuffer;
|
||||
bool m_isReadOnly;
|
||||
};
|
||||
|
||||
#endif //B3_BUFFER_INFO_CL_H
|
||||
@@ -1,13 +1,13 @@
|
||||
#include "btFillCL.h"
|
||||
#include "b3FillCL.h"
|
||||
#include "../../basic_initialize/b3OpenCLUtils.h"
|
||||
#include "btBufferInfoCL.h"
|
||||
#include "btLauncherCL.h"
|
||||
#include "b3BufferInfoCL.h"
|
||||
#include "b3LauncherCL.h"
|
||||
|
||||
#define FILL_CL_PROGRAM_PATH "opencl/parallel_primitives/kernels/FillKernels.cl"
|
||||
|
||||
#include "../kernels/FillKernelsCL.h"
|
||||
|
||||
btFillCL::btFillCL(cl_context ctx, cl_device_id device, cl_command_queue queue)
|
||||
b3FillCL::b3FillCL(cl_context ctx, cl_device_id device, cl_command_queue queue)
|
||||
:m_commandQueue(queue)
|
||||
{
|
||||
const char* kernelSource = fillKernelsCL;
|
||||
@@ -15,25 +15,25 @@ btFillCL::btFillCL(cl_context ctx, cl_device_id device, cl_command_queue queue)
|
||||
const char* additionalMacros = "";
|
||||
|
||||
cl_program fillProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, kernelSource, &pErrNum,additionalMacros, FILL_CL_PROGRAM_PATH);
|
||||
btAssert(fillProg);
|
||||
b3Assert(fillProg);
|
||||
|
||||
m_fillIntKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "FillIntKernel", &pErrNum, fillProg,additionalMacros );
|
||||
btAssert(m_fillIntKernel);
|
||||
b3Assert(m_fillIntKernel);
|
||||
|
||||
m_fillUnsignedIntKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "FillUnsignedIntKernel", &pErrNum, fillProg,additionalMacros );
|
||||
btAssert(m_fillIntKernel);
|
||||
b3Assert(m_fillIntKernel);
|
||||
|
||||
m_fillFloatKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "FillFloatKernel", &pErrNum, fillProg,additionalMacros );
|
||||
btAssert(m_fillFloatKernel);
|
||||
b3Assert(m_fillFloatKernel);
|
||||
|
||||
|
||||
|
||||
m_fillKernelInt2 = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "FillInt2Kernel", &pErrNum, fillProg,additionalMacros );
|
||||
btAssert(m_fillKernelInt2);
|
||||
b3Assert(m_fillKernelInt2);
|
||||
|
||||
}
|
||||
|
||||
btFillCL::~btFillCL()
|
||||
b3FillCL::~b3FillCL()
|
||||
{
|
||||
clReleaseKernel(m_fillKernelInt2);
|
||||
clReleaseKernel(m_fillIntKernel);
|
||||
@@ -42,12 +42,12 @@ btFillCL::~btFillCL()
|
||||
|
||||
}
|
||||
|
||||
void btFillCL::execute(btOpenCLArray<float>& src, const float value, int n, int offset)
|
||||
void b3FillCL::execute(b3OpenCLArray<float>& src, const float value, int n, int offset)
|
||||
{
|
||||
btAssert( n>0 );
|
||||
b3Assert( n>0 );
|
||||
|
||||
{
|
||||
btLauncherCL launcher( m_commandQueue, m_fillFloatKernel );
|
||||
b3LauncherCL launcher( m_commandQueue, m_fillFloatKernel );
|
||||
launcher.setBuffer( src.getBufferCL());
|
||||
launcher.setConst( n );
|
||||
launcher.setConst( value );
|
||||
@@ -57,13 +57,13 @@ void btFillCL::execute(btOpenCLArray<float>& src, const float value, int n, int
|
||||
}
|
||||
}
|
||||
|
||||
void btFillCL::execute(btOpenCLArray<int>& src, const int value, int n, int offset)
|
||||
void b3FillCL::execute(b3OpenCLArray<int>& src, const int value, int n, int offset)
|
||||
{
|
||||
btAssert( n>0 );
|
||||
b3Assert( n>0 );
|
||||
|
||||
|
||||
{
|
||||
btLauncherCL launcher( m_commandQueue, m_fillIntKernel );
|
||||
b3LauncherCL launcher( m_commandQueue, m_fillIntKernel );
|
||||
launcher.setBuffer(src.getBufferCL());
|
||||
launcher.setConst( n);
|
||||
launcher.setConst( value);
|
||||
@@ -73,15 +73,15 @@ void btFillCL::execute(btOpenCLArray<int>& src, const int value, int n, int offs
|
||||
}
|
||||
|
||||
|
||||
void btFillCL::execute(btOpenCLArray<unsigned int>& src, const unsigned int value, int n, int offset)
|
||||
void b3FillCL::execute(b3OpenCLArray<unsigned int>& src, const unsigned int value, int n, int offset)
|
||||
{
|
||||
btAssert( n>0 );
|
||||
b3Assert( n>0 );
|
||||
|
||||
{
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( src.getBufferCL() ) };
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src.getBufferCL() ) };
|
||||
|
||||
btLauncherCL launcher( m_commandQueue, m_fillUnsignedIntKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3LauncherCL launcher( m_commandQueue, m_fillUnsignedIntKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( n );
|
||||
launcher.setConst(value);
|
||||
launcher.setConst(offset);
|
||||
@@ -90,7 +90,7 @@ void btFillCL::execute(btOpenCLArray<unsigned int>& src, const unsigned int valu
|
||||
}
|
||||
}
|
||||
|
||||
void btFillCL::executeHost(b3AlignedObjectArray<btInt2> &src, const btInt2 &value, int n, int offset)
|
||||
void b3FillCL::executeHost(b3AlignedObjectArray<b3Int2> &src, const b3Int2 &value, int n, int offset)
|
||||
{
|
||||
for (int i=0;i<n;i++)
|
||||
{
|
||||
@@ -98,7 +98,7 @@ void btFillCL::executeHost(b3AlignedObjectArray<btInt2> &src, const btInt2 &valu
|
||||
}
|
||||
}
|
||||
|
||||
void btFillCL::executeHost(b3AlignedObjectArray<int> &src, const int value, int n, int offset)
|
||||
void b3FillCL::executeHost(b3AlignedObjectArray<int> &src, const int value, int n, int offset)
|
||||
{
|
||||
for (int i=0;i<n;i++)
|
||||
{
|
||||
@@ -106,16 +106,16 @@ void btFillCL::executeHost(b3AlignedObjectArray<int> &src, const int value, int
|
||||
}
|
||||
}
|
||||
|
||||
void btFillCL::execute(btOpenCLArray<btInt2> &src, const btInt2 &value, int n, int offset)
|
||||
void b3FillCL::execute(b3OpenCLArray<b3Int2> &src, const b3Int2 &value, int n, int offset)
|
||||
{
|
||||
btAssert( n>0 );
|
||||
b3Assert( n>0 );
|
||||
|
||||
|
||||
{
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( src.getBufferCL() ) };
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src.getBufferCL() ) };
|
||||
|
||||
btLauncherCL launcher(m_commandQueue, m_fillKernelInt2);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3LauncherCL launcher(m_commandQueue, m_fillKernelInt2);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst(n);
|
||||
launcher.setConst(value);
|
||||
launcher.setConst(offset);
|
||||
63
opencl/parallel_primitives/host/b3FillCL.h
Normal file
63
opencl/parallel_primitives/host/b3FillCL.h
Normal file
@@ -0,0 +1,63 @@
|
||||
#ifndef B3_FILL_CL_H
|
||||
#define B3_FILL_CL_H
|
||||
|
||||
#include "b3OpenCLArray.h"
|
||||
#include "Bullet3Common/b3Scalar.h"
|
||||
|
||||
#include "b3Int2.h"
|
||||
#include "b3Int4.h"
|
||||
|
||||
|
||||
class b3FillCL
|
||||
{
|
||||
|
||||
cl_command_queue m_commandQueue;
|
||||
|
||||
cl_kernel m_fillKernelInt2;
|
||||
cl_kernel m_fillIntKernel;
|
||||
cl_kernel m_fillUnsignedIntKernel;
|
||||
cl_kernel m_fillFloatKernel;
|
||||
|
||||
public:
|
||||
|
||||
struct b3ConstData
|
||||
{
|
||||
union
|
||||
{
|
||||
b3Int4 m_data;
|
||||
b3UnsignedInt4 m_UnsignedData;
|
||||
};
|
||||
int m_offset;
|
||||
int m_n;
|
||||
int m_padding[2];
|
||||
};
|
||||
|
||||
protected:
|
||||
|
||||
public:
|
||||
|
||||
b3FillCL(cl_context ctx, cl_device_id device, cl_command_queue queue);
|
||||
|
||||
virtual ~b3FillCL();
|
||||
|
||||
void execute(b3OpenCLArray<unsigned int>& src, const unsigned int value, int n, int offset = 0);
|
||||
|
||||
void execute(b3OpenCLArray<int>& src, const int value, int n, int offset = 0);
|
||||
|
||||
void execute(b3OpenCLArray<float>& src, const float value, int n, int offset = 0);
|
||||
|
||||
void execute(b3OpenCLArray<b3Int2>& src, const b3Int2& value, int n, int offset = 0);
|
||||
|
||||
void executeHost(b3AlignedObjectArray<b3Int2> &src, const b3Int2 &value, int n, int offset);
|
||||
|
||||
void executeHost(b3AlignedObjectArray<int> &src, const int value, int n, int offset);
|
||||
|
||||
// void execute(b3OpenCLArray<b3Int4>& src, const b3Int4& value, int n, int offset = 0);
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#endif //B3_FILL_CL_H
|
||||
@@ -1,7 +1,7 @@
|
||||
#ifndef BT_INT2_H
|
||||
#define BT_INT2_H
|
||||
#ifndef B3_INT2_H
|
||||
#define B3_INT2_H
|
||||
|
||||
struct btUnsignedInt2
|
||||
struct b3UnsignedInt2
|
||||
{
|
||||
union
|
||||
{
|
||||
@@ -16,7 +16,7 @@ struct btUnsignedInt2
|
||||
};
|
||||
};
|
||||
|
||||
struct btInt2
|
||||
struct b3Int2
|
||||
{
|
||||
union
|
||||
{
|
||||
@@ -1,11 +1,11 @@
|
||||
#ifndef BT_INT4_H
|
||||
#define BT_INT4_H
|
||||
#ifndef B3_INT4_H
|
||||
#define B3_INT4_H
|
||||
|
||||
#include "Bullet3Common/b3Scalar.h"
|
||||
|
||||
ATTRIBUTE_ALIGNED16(struct) btUnsignedInt4
|
||||
ATTRIBUTE_ALIGNED16(struct) b3UnsignedInt4
|
||||
{
|
||||
BT_DECLARE_ALIGNED_ALLOCATOR();
|
||||
B3_DECLARE_ALIGNED_ALLOCATOR();
|
||||
|
||||
union
|
||||
{
|
||||
@@ -20,9 +20,9 @@ ATTRIBUTE_ALIGNED16(struct) btUnsignedInt4
|
||||
};
|
||||
};
|
||||
|
||||
ATTRIBUTE_ALIGNED16(struct) btInt4
|
||||
ATTRIBUTE_ALIGNED16(struct) b3Int4
|
||||
{
|
||||
BT_DECLARE_ALIGNED_ALLOCATOR();
|
||||
B3_DECLARE_ALIGNED_ALLOCATOR();
|
||||
|
||||
union
|
||||
{
|
||||
@@ -37,19 +37,19 @@ ATTRIBUTE_ALIGNED16(struct) btInt4
|
||||
};
|
||||
};
|
||||
|
||||
SIMD_FORCE_INLINE btInt4 btMakeInt4(int x, int y, int z, int w = 0)
|
||||
SIMD_FORCE_INLINE b3Int4 b3MakeInt4(int x, int y, int z, int w = 0)
|
||||
{
|
||||
btInt4 v;
|
||||
b3Int4 v;
|
||||
v.s[0] = x; v.s[1] = y; v.s[2] = z; v.s[3] = w;
|
||||
return v;
|
||||
}
|
||||
|
||||
SIMD_FORCE_INLINE btUnsignedInt4 btMakeUnsignedInt4(unsigned int x, unsigned int y, unsigned int z, unsigned int w = 0)
|
||||
SIMD_FORCE_INLINE b3UnsignedInt4 b3MakeUnsignedInt4(unsigned int x, unsigned int y, unsigned int z, unsigned int w = 0)
|
||||
{
|
||||
btUnsignedInt4 v;
|
||||
b3UnsignedInt4 v;
|
||||
v.s[0] = x; v.s[1] = y; v.s[2] = z; v.s[3] = w;
|
||||
return v;
|
||||
}
|
||||
|
||||
|
||||
#endif //BT_INT4_H
|
||||
#endif //B3_INT4_H
|
||||
@@ -1,17 +1,17 @@
|
||||
|
||||
#ifndef BT_LAUNCHER_CL_H
|
||||
#define BT_LAUNCHER_CL_H
|
||||
#ifndef B3_LAUNCHER_CL_H
|
||||
#define B3_LAUNCHER_CL_H
|
||||
|
||||
#include "btBufferInfoCL.h"
|
||||
#include "b3BufferInfoCL.h"
|
||||
#include "Bullet3Common/b3MinMax.h"
|
||||
#include "btOpenCLArray.h"
|
||||
#include "b3OpenCLArray.h"
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#pragma warning(disable :4996)
|
||||
#endif
|
||||
#define BT_CL_MAX_ARG_SIZE 16
|
||||
struct btKernelArgData
|
||||
#define B3_CL_MAX_ARG_SIZE 16
|
||||
struct b3KernelArgData
|
||||
{
|
||||
int m_isBuffer;
|
||||
int m_argIndex;
|
||||
@@ -19,28 +19,28 @@ struct btKernelArgData
|
||||
union
|
||||
{
|
||||
cl_mem m_clBuffer;
|
||||
unsigned char m_argData[BT_CL_MAX_ARG_SIZE];
|
||||
unsigned char m_argData[B3_CL_MAX_ARG_SIZE];
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
class btLauncherCL
|
||||
class b3LauncherCL
|
||||
{
|
||||
|
||||
cl_command_queue m_commandQueue;
|
||||
cl_kernel m_kernel;
|
||||
int m_idx;
|
||||
|
||||
b3AlignedObjectArray<btKernelArgData> m_kernelArguments;
|
||||
b3AlignedObjectArray<b3KernelArgData> m_kernelArguments;
|
||||
|
||||
|
||||
int m_serializationSizeInBytes;
|
||||
|
||||
public:
|
||||
|
||||
b3AlignedObjectArray<btOpenCLArray<unsigned char>* > m_arrays;
|
||||
b3AlignedObjectArray<b3OpenCLArray<unsigned char>* > m_arrays;
|
||||
|
||||
btLauncherCL(cl_command_queue queue, cl_kernel kernel)
|
||||
b3LauncherCL(cl_command_queue queue, cl_kernel kernel)
|
||||
:m_commandQueue(queue),
|
||||
m_kernel(kernel),
|
||||
m_idx(0)
|
||||
@@ -48,7 +48,7 @@ class btLauncherCL
|
||||
m_serializationSizeInBytes = sizeof(int);
|
||||
}
|
||||
|
||||
virtual ~btLauncherCL()
|
||||
virtual ~b3LauncherCL()
|
||||
{
|
||||
for (int i=0;i<m_arrays.size();i++)
|
||||
{
|
||||
@@ -59,7 +59,7 @@ class btLauncherCL
|
||||
inline void setBuffer( cl_mem clBuffer)
|
||||
{
|
||||
|
||||
btKernelArgData kernelArg;
|
||||
b3KernelArgData kernelArg;
|
||||
kernelArg.m_argIndex = m_idx;
|
||||
kernelArg.m_isBuffer = 1;
|
||||
kernelArg.m_clBuffer = clBuffer;
|
||||
@@ -75,23 +75,23 @@ class btLauncherCL
|
||||
¶m_value,
|
||||
&actualSizeInBytes);
|
||||
|
||||
btAssert( err == CL_SUCCESS );
|
||||
b3Assert( err == CL_SUCCESS );
|
||||
kernelArg.m_argSizeInBytes = param_value;
|
||||
|
||||
m_kernelArguments.push_back(kernelArg);
|
||||
m_serializationSizeInBytes+= sizeof(btKernelArgData);
|
||||
m_serializationSizeInBytes+= sizeof(b3KernelArgData);
|
||||
m_serializationSizeInBytes+=param_value;
|
||||
|
||||
cl_int status = clSetKernelArg( m_kernel, m_idx++, sizeof(cl_mem), &clBuffer);
|
||||
btAssert( status == CL_SUCCESS );
|
||||
b3Assert( status == CL_SUCCESS );
|
||||
}
|
||||
|
||||
|
||||
inline void setBuffers( btBufferInfoCL* buffInfo, int n )
|
||||
inline void setBuffers( b3BufferInfoCL* buffInfo, int n )
|
||||
{
|
||||
for(int i=0; i<n; i++)
|
||||
{
|
||||
btKernelArgData kernelArg;
|
||||
b3KernelArgData kernelArg;
|
||||
kernelArg.m_argIndex = m_idx;
|
||||
kernelArg.m_isBuffer = 1;
|
||||
kernelArg.m_clBuffer = buffInfo[i].m_clBuffer;
|
||||
@@ -107,15 +107,15 @@ class btLauncherCL
|
||||
¶m_value,
|
||||
&actualSizeInBytes);
|
||||
|
||||
btAssert( err == CL_SUCCESS );
|
||||
b3Assert( err == CL_SUCCESS );
|
||||
kernelArg.m_argSizeInBytes = param_value;
|
||||
|
||||
m_kernelArguments.push_back(kernelArg);
|
||||
m_serializationSizeInBytes+= sizeof(btKernelArgData);
|
||||
m_serializationSizeInBytes+= sizeof(b3KernelArgData);
|
||||
m_serializationSizeInBytes+=param_value;
|
||||
|
||||
cl_int status = clSetKernelArg( m_kernel, m_idx++, sizeof(cl_mem), &buffInfo[i].m_clBuffer);
|
||||
btAssert( status == CL_SUCCESS );
|
||||
b3Assert( status == CL_SUCCESS );
|
||||
}
|
||||
}
|
||||
|
||||
@@ -133,12 +133,12 @@ class btLauncherCL
|
||||
|
||||
for (int i=0;i<numArguments;i++)
|
||||
{
|
||||
btKernelArgData* arg = (btKernelArgData*)&buf[index];
|
||||
b3KernelArgData* arg = (b3KernelArgData*)&buf[index];
|
||||
|
||||
index+=sizeof(btKernelArgData);
|
||||
index+=sizeof(b3KernelArgData);
|
||||
if (arg->m_isBuffer)
|
||||
{
|
||||
btOpenCLArray<unsigned char>* clData = new btOpenCLArray<unsigned char>(ctx,m_commandQueue, arg->m_argSizeInBytes);
|
||||
b3OpenCLArray<unsigned char>* clData = new b3OpenCLArray<unsigned char>(ctx,m_commandQueue, arg->m_argSizeInBytes);
|
||||
clData->resize(arg->m_argSizeInBytes);
|
||||
|
||||
clData->copyFromHostPointer(&buf[index], arg->m_argSizeInBytes);
|
||||
@@ -148,12 +148,12 @@ class btLauncherCL
|
||||
m_arrays.push_back(clData);
|
||||
|
||||
cl_int status = clSetKernelArg( m_kernel, m_idx++, sizeof(cl_mem), &arg->m_clBuffer);
|
||||
btAssert( status == CL_SUCCESS );
|
||||
b3Assert( status == CL_SUCCESS );
|
||||
index+=arg->m_argSizeInBytes;
|
||||
} else
|
||||
{
|
||||
cl_int status = clSetKernelArg( m_kernel, m_idx++, arg->m_argSizeInBytes, &arg->m_argData);
|
||||
btAssert( status == CL_SUCCESS );
|
||||
b3Assert( status == CL_SUCCESS );
|
||||
}
|
||||
m_kernelArguments.push_back(*arg);
|
||||
}
|
||||
@@ -176,7 +176,7 @@ class btLauncherCL
|
||||
|
||||
for (int ii=0;ii<numArguments;ii++)
|
||||
{
|
||||
btKernelArgData* argGold = (btKernelArgData*)&goldBuffer[index];
|
||||
b3KernelArgData* argGold = (b3KernelArgData*)&goldBuffer[index];
|
||||
|
||||
if (m_kernelArguments[ii].m_argSizeInBytes != argGold->m_argSizeInBytes)
|
||||
{
|
||||
@@ -194,7 +194,7 @@ class btLauncherCL
|
||||
return -3;
|
||||
}
|
||||
}
|
||||
index+=sizeof(btKernelArgData);
|
||||
index+=sizeof(b3KernelArgData);
|
||||
|
||||
if (argGold->m_isBuffer)
|
||||
{
|
||||
@@ -209,7 +209,7 @@ class btLauncherCL
|
||||
cl_int status = 0;
|
||||
status = clEnqueueReadBuffer( m_commandQueue, m_kernelArguments[ii].m_clBuffer, CL_TRUE, 0, m_kernelArguments[ii].m_argSizeInBytes,
|
||||
memBuf, 0,0,0 );
|
||||
btAssert( status==CL_SUCCESS );
|
||||
b3Assert( status==CL_SUCCESS );
|
||||
clFinish(m_commandQueue);
|
||||
|
||||
for (int b=0;b<m_kernelArguments[ii].m_argSizeInBytes;b++)
|
||||
@@ -256,7 +256,7 @@ class btLauncherCL
|
||||
|
||||
assert(destBufferCapacity>=m_serializationSizeInBytes);
|
||||
|
||||
//todo: use the btSerializer for this to allow for 32/64bit, endianness etc
|
||||
//todo: use the b3Serializer for this to allow for 32/64bit, endianness etc
|
||||
int numArguments = m_kernelArguments.size();
|
||||
int curBufferSize = 0;
|
||||
int* dest = (int*)&destBuffer[curBufferSize];
|
||||
@@ -267,16 +267,16 @@ class btLauncherCL
|
||||
|
||||
for (int i=0;i<this->m_kernelArguments.size();i++)
|
||||
{
|
||||
btKernelArgData* arg = (btKernelArgData*) &destBuffer[curBufferSize];
|
||||
b3KernelArgData* arg = (b3KernelArgData*) &destBuffer[curBufferSize];
|
||||
*arg = m_kernelArguments[i];
|
||||
curBufferSize+=sizeof(btKernelArgData);
|
||||
curBufferSize+=sizeof(b3KernelArgData);
|
||||
if (arg->m_isBuffer==1)
|
||||
{
|
||||
//copy the OpenCL buffer content
|
||||
cl_int status = 0;
|
||||
status = clEnqueueReadBuffer( m_commandQueue, arg->m_clBuffer, 0, 0, arg->m_argSizeInBytes,
|
||||
&destBuffer[curBufferSize], 0,0,0 );
|
||||
btAssert( status==CL_SUCCESS );
|
||||
b3Assert( status==CL_SUCCESS );
|
||||
clFinish(m_commandQueue);
|
||||
curBufferSize+=arg->m_argSizeInBytes;
|
||||
}
|
||||
@@ -317,18 +317,18 @@ class btLauncherCL
|
||||
inline void setConst( const T& consts )
|
||||
{
|
||||
int sz=sizeof(T);
|
||||
btAssert(sz<=BT_CL_MAX_ARG_SIZE);
|
||||
btKernelArgData kernelArg;
|
||||
b3Assert(sz<=B3_CL_MAX_ARG_SIZE);
|
||||
b3KernelArgData kernelArg;
|
||||
kernelArg.m_argIndex = m_idx;
|
||||
kernelArg.m_isBuffer = 0;
|
||||
T* destArg = (T*)kernelArg.m_argData;
|
||||
*destArg = consts;
|
||||
kernelArg.m_argSizeInBytes = sizeof(T);
|
||||
m_kernelArguments.push_back(kernelArg);
|
||||
m_serializationSizeInBytes+=sizeof(btKernelArgData);
|
||||
m_serializationSizeInBytes+=sizeof(b3KernelArgData);
|
||||
|
||||
cl_int status = clSetKernelArg( m_kernel, m_idx++, sz, &consts );
|
||||
btAssert( status == CL_SUCCESS );
|
||||
b3Assert( status == CL_SUCCESS );
|
||||
}
|
||||
|
||||
inline void launch1D( int numThreads, int localSize = 64)
|
||||
@@ -342,9 +342,9 @@ class btLauncherCL
|
||||
size_t lRange[3] = {1,1,1};
|
||||
lRange[0] = localSizeX;
|
||||
lRange[1] = localSizeY;
|
||||
gRange[0] = btMax((size_t)1, (numThreadsX/lRange[0])+(!(numThreadsX%lRange[0])?0:1));
|
||||
gRange[0] = b3Max((size_t)1, (numThreadsX/lRange[0])+(!(numThreadsX%lRange[0])?0:1));
|
||||
gRange[0] *= lRange[0];
|
||||
gRange[1] = btMax((size_t)1, (numThreadsY/lRange[1])+(!(numThreadsY%lRange[1])?0:1));
|
||||
gRange[1] = b3Max((size_t)1, (numThreadsY/lRange[1])+(!(numThreadsY%lRange[1])?0:1));
|
||||
gRange[1] *= lRange[1];
|
||||
|
||||
cl_int status = clEnqueueNDRangeKernel( m_commandQueue,
|
||||
@@ -353,11 +353,11 @@ class btLauncherCL
|
||||
{
|
||||
printf("Error: OpenCL status = %d\n",status);
|
||||
}
|
||||
btAssert( status == CL_SUCCESS );
|
||||
b3Assert( status == CL_SUCCESS );
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif //BT_LAUNCHER_CL_H
|
||||
#endif //B3_LAUNCHER_CL_H
|
||||
@@ -1,11 +1,11 @@
|
||||
#ifndef BT_OPENCL_ARRAY_H
|
||||
#define BT_OPENCL_ARRAY_H
|
||||
#ifndef B3_OPENCL_ARRAY_H
|
||||
#define B3_OPENCL_ARRAY_H
|
||||
|
||||
#include "Bullet3Common/b3AlignedObjectArray.h"
|
||||
#include "../../basic_initialize/b3OpenCLInclude.h"
|
||||
|
||||
template <typename T>
|
||||
class btOpenCLArray
|
||||
class b3OpenCLArray
|
||||
{
|
||||
int m_size;
|
||||
int m_capacity;
|
||||
@@ -28,7 +28,7 @@ class btOpenCLArray
|
||||
m_capacity=0;
|
||||
}
|
||||
|
||||
btOpenCLArray<T>& operator=(const btOpenCLArray<T>& src);
|
||||
b3OpenCLArray<T>& operator=(const b3OpenCLArray<T>& src);
|
||||
|
||||
SIMD_FORCE_INLINE int allocSize(int size)
|
||||
{
|
||||
@@ -37,7 +37,7 @@ class btOpenCLArray
|
||||
|
||||
public:
|
||||
|
||||
btOpenCLArray(cl_context ctx, cl_command_queue queue, int initialCapacity=0, bool allowGrowingCapacity=true)
|
||||
b3OpenCLArray(cl_context ctx, cl_command_queue queue, int initialCapacity=0, bool allowGrowingCapacity=true)
|
||||
:m_size(0), m_capacity(0),m_clBuffer(0),
|
||||
m_clContext(ctx),m_commandQueue(queue),
|
||||
m_ownsMemory(true),m_allowGrowingCapacity(true)
|
||||
@@ -61,7 +61,7 @@ public:
|
||||
}
|
||||
|
||||
// we could enable this assignment, but need to make sure to avoid accidental deep copies
|
||||
// btOpenCLArray<T>& operator=(const b3AlignedObjectArray<T>& src)
|
||||
// b3OpenCLArray<T>& operator=(const b3AlignedObjectArray<T>& src)
|
||||
// {
|
||||
// copyFromArray(src);
|
||||
// return *this;
|
||||
@@ -74,7 +74,7 @@ public:
|
||||
}
|
||||
|
||||
|
||||
virtual ~btOpenCLArray()
|
||||
virtual ~b3OpenCLArray()
|
||||
{
|
||||
deallocate();
|
||||
m_size=0;
|
||||
@@ -94,8 +94,8 @@ public:
|
||||
|
||||
SIMD_FORCE_INLINE T forcedAt(int n) const
|
||||
{
|
||||
btAssert(n>=0);
|
||||
btAssert(n<capacity());
|
||||
b3Assert(n>=0);
|
||||
b3Assert(n<capacity());
|
||||
T elem;
|
||||
copyToHostPointer(&elem,1,n,true);
|
||||
return elem;
|
||||
@@ -103,8 +103,8 @@ public:
|
||||
|
||||
SIMD_FORCE_INLINE T at(int n) const
|
||||
{
|
||||
btAssert(n>=0);
|
||||
btAssert(n<size());
|
||||
b3Assert(n>=0);
|
||||
b3Assert(n<size());
|
||||
T elem;
|
||||
copyToHostPointer(&elem,1,n,true);
|
||||
return elem;
|
||||
@@ -152,18 +152,18 @@ public:
|
||||
//create a new OpenCL buffer
|
||||
int memSizeInBytes = sizeof(T)*_Count;
|
||||
cl_mem buf = clCreateBuffer(m_clContext, CL_MEM_READ_WRITE, memSizeInBytes, NULL, &ciErrNum);
|
||||
btAssert(ciErrNum==CL_SUCCESS);
|
||||
b3Assert(ciErrNum==CL_SUCCESS);
|
||||
|
||||
//#define BT_ALWAYS_INITIALIZE_OPENCL_BUFFERS
|
||||
#ifdef BT_ALWAYS_INITIALIZE_OPENCL_BUFFERS
|
||||
//#define B3_ALWAYS_INITIALIZE_OPENCL_BUFFERS
|
||||
#ifdef B3_ALWAYS_INITIALIZE_OPENCL_BUFFERS
|
||||
unsigned char* src = (unsigned char*)malloc(memSizeInBytes);
|
||||
for (int i=0;i<memSizeInBytes;i++)
|
||||
src[i] = 0xbb;
|
||||
ciErrNum = clEnqueueWriteBuffer( m_commandQueue, buf, CL_TRUE, 0, memSizeInBytes, src, 0,0,0 );
|
||||
btAssert(ciErrNum==CL_SUCCESS);
|
||||
b3Assert(ciErrNum==CL_SUCCESS);
|
||||
clFinish(m_commandQueue);
|
||||
free(src);
|
||||
#endif //BT_ALWAYS_INITIALIZE_OPENCL_BUFFERS
|
||||
#endif //B3_ALWAYS_INITIALIZE_OPENCL_BUFFERS
|
||||
|
||||
if (copyOldContents)
|
||||
copyToCL(buf, size());
|
||||
@@ -177,7 +177,7 @@ public:
|
||||
} else
|
||||
{
|
||||
//fail: assert and
|
||||
btAssert(0);
|
||||
b3Assert(0);
|
||||
deallocate();
|
||||
}
|
||||
}
|
||||
@@ -189,19 +189,19 @@ public:
|
||||
if (numElements<=0)
|
||||
return;
|
||||
|
||||
btAssert(m_clBuffer);
|
||||
btAssert(destination);
|
||||
b3Assert(m_clBuffer);
|
||||
b3Assert(destination);
|
||||
|
||||
//likely some error, destination is same as source
|
||||
btAssert(m_clBuffer != destination);
|
||||
b3Assert(m_clBuffer != destination);
|
||||
|
||||
btAssert((firstElem+numElements)<=m_size);
|
||||
b3Assert((firstElem+numElements)<=m_size);
|
||||
|
||||
cl_int status = 0;
|
||||
|
||||
|
||||
btAssert(numElements>0);
|
||||
btAssert(numElements<=m_size);
|
||||
b3Assert(numElements>0);
|
||||
b3Assert(numElements<=m_size);
|
||||
|
||||
int srcOffsetBytes = sizeof(T)*firstElem;
|
||||
int dstOffsetInBytes = sizeof(T)*dstOffsetInElems;
|
||||
@@ -209,7 +209,7 @@ public:
|
||||
status = clEnqueueCopyBuffer( m_commandQueue, m_clBuffer, destination,
|
||||
srcOffsetBytes, dstOffsetInBytes, sizeof(T)*numElements, 0, 0, 0 );
|
||||
|
||||
btAssert( status == CL_SUCCESS );
|
||||
b3Assert( status == CL_SUCCESS );
|
||||
}
|
||||
|
||||
void copyFromHost(const b3AlignedObjectArray<T>& srcArray, bool waitForCompletion=true)
|
||||
@@ -225,13 +225,13 @@ public:
|
||||
|
||||
void copyFromHostPointer(const T* src, int numElems, int destFirstElem= 0, bool waitForCompletion=true)
|
||||
{
|
||||
btAssert(numElems+destFirstElem <= capacity());
|
||||
b3Assert(numElems+destFirstElem <= capacity());
|
||||
|
||||
cl_int status = 0;
|
||||
int sizeInBytes=sizeof(T)*numElems;
|
||||
status = clEnqueueWriteBuffer( m_commandQueue, m_clBuffer, 0, sizeof(T)*destFirstElem, sizeInBytes,
|
||||
src, 0,0,0 );
|
||||
btAssert(status == CL_SUCCESS );
|
||||
b3Assert(status == CL_SUCCESS );
|
||||
if (waitForCompletion)
|
||||
clFinish(m_commandQueue);
|
||||
|
||||
@@ -247,18 +247,18 @@ public:
|
||||
|
||||
void copyToHostPointer(T* destPtr, int numElem, int srcFirstElem=0, bool waitForCompletion=true) const
|
||||
{
|
||||
btAssert(numElem+srcFirstElem <= capacity());
|
||||
b3Assert(numElem+srcFirstElem <= capacity());
|
||||
|
||||
cl_int status = 0;
|
||||
status = clEnqueueReadBuffer( m_commandQueue, m_clBuffer, 0, sizeof(T)*srcFirstElem, sizeof(T)*numElem,
|
||||
destPtr, 0,0,0 );
|
||||
btAssert( status==CL_SUCCESS );
|
||||
b3Assert( status==CL_SUCCESS );
|
||||
|
||||
if (waitForCompletion)
|
||||
clFinish(m_commandQueue);
|
||||
}
|
||||
|
||||
void copyFromOpenCLArray(const btOpenCLArray& src)
|
||||
void copyFromOpenCLArray(const b3OpenCLArray& src)
|
||||
{
|
||||
int newSize = src.size();
|
||||
resize(newSize);
|
||||
@@ -271,4 +271,4 @@ public:
|
||||
};
|
||||
|
||||
|
||||
#endif //BT_OPENCL_ARRAY_H
|
||||
#endif //B3_OPENCL_ARRAY_H
|
||||
@@ -1,32 +1,32 @@
|
||||
#include "btPrefixScanCL.h"
|
||||
#include "btFillCL.h"
|
||||
#define BT_PREFIXSCAN_PROG_PATH "opencl/parallel_primitives/kernels/PrefixScanKernels.cl"
|
||||
#include "b3PrefixScanCL.h"
|
||||
#include "b3FillCL.h"
|
||||
#define B3_PREFIXSCAN_PROG_PATH "opencl/parallel_primitives/kernels/PrefixScanKernels.cl"
|
||||
|
||||
#include "btLauncherCL.h"
|
||||
#include "b3LauncherCL.h"
|
||||
#include "../../basic_initialize/b3OpenCLUtils.h"
|
||||
#include "../kernels/PrefixScanKernelsCL.h"
|
||||
|
||||
btPrefixScanCL::btPrefixScanCL(cl_context ctx, cl_device_id device, cl_command_queue queue, int size)
|
||||
b3PrefixScanCL::b3PrefixScanCL(cl_context ctx, cl_device_id device, cl_command_queue queue, int size)
|
||||
:m_commandQueue(queue)
|
||||
{
|
||||
const char* scanKernelSource = prefixScanKernelsCL;
|
||||
cl_int pErrNum;
|
||||
char* additionalMacros=0;
|
||||
|
||||
m_workBuffer = new btOpenCLArray<unsigned int>(ctx,queue,size);
|
||||
cl_program scanProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, scanKernelSource, &pErrNum,additionalMacros, BT_PREFIXSCAN_PROG_PATH);
|
||||
btAssert(scanProg);
|
||||
m_workBuffer = new b3OpenCLArray<unsigned int>(ctx,queue,size);
|
||||
cl_program scanProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, scanKernelSource, &pErrNum,additionalMacros, B3_PREFIXSCAN_PROG_PATH);
|
||||
b3Assert(scanProg);
|
||||
|
||||
m_localScanKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, scanKernelSource, "LocalScanKernel", &pErrNum, scanProg,additionalMacros );
|
||||
btAssert(m_localScanKernel );
|
||||
b3Assert(m_localScanKernel );
|
||||
m_blockSumKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, scanKernelSource, "TopLevelScanKernel", &pErrNum, scanProg,additionalMacros );
|
||||
btAssert(m_blockSumKernel );
|
||||
b3Assert(m_blockSumKernel );
|
||||
m_propagationKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, scanKernelSource, "AddOffsetKernel", &pErrNum, scanProg,additionalMacros );
|
||||
btAssert(m_propagationKernel );
|
||||
b3Assert(m_propagationKernel );
|
||||
}
|
||||
|
||||
|
||||
btPrefixScanCL::~btPrefixScanCL()
|
||||
b3PrefixScanCL::~b3PrefixScanCL()
|
||||
{
|
||||
delete m_workBuffer;
|
||||
clReleaseKernel(m_localScanKernel);
|
||||
@@ -35,7 +35,7 @@ btPrefixScanCL::~btPrefixScanCL()
|
||||
}
|
||||
|
||||
template<class T>
|
||||
T btNextPowerOf2(T n)
|
||||
T b3NextPowerOf2(T n)
|
||||
{
|
||||
n -= 1;
|
||||
for(int i=0; i<sizeof(T)*8; i++)
|
||||
@@ -43,37 +43,37 @@ T btNextPowerOf2(T n)
|
||||
return n+1;
|
||||
}
|
||||
|
||||
void btPrefixScanCL::execute(btOpenCLArray<unsigned int>& src, btOpenCLArray<unsigned int>& dst, int n, unsigned int* sum)
|
||||
void b3PrefixScanCL::execute(b3OpenCLArray<unsigned int>& src, b3OpenCLArray<unsigned int>& dst, int n, unsigned int* sum)
|
||||
{
|
||||
|
||||
// btAssert( data->m_option == EXCLUSIVE );
|
||||
// b3Assert( data->m_option == EXCLUSIVE );
|
||||
const unsigned int numBlocks = (const unsigned int)( (n+BLOCK_SIZE*2-1)/(BLOCK_SIZE*2) );
|
||||
|
||||
dst.resize(src.size());
|
||||
m_workBuffer->resize(src.size());
|
||||
|
||||
btInt4 constBuffer;
|
||||
b3Int4 constBuffer;
|
||||
constBuffer.x = n;
|
||||
constBuffer.y = numBlocks;
|
||||
constBuffer.z = (int)btNextPowerOf2( numBlocks );
|
||||
constBuffer.z = (int)b3NextPowerOf2( numBlocks );
|
||||
|
||||
btOpenCLArray<unsigned int>* srcNative = &src;
|
||||
btOpenCLArray<unsigned int>* dstNative = &dst;
|
||||
b3OpenCLArray<unsigned int>* srcNative = &src;
|
||||
b3OpenCLArray<unsigned int>* dstNative = &dst;
|
||||
|
||||
{
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( dstNative->getBufferCL() ), btBufferInfoCL( srcNative->getBufferCL() ), btBufferInfoCL( m_workBuffer->getBufferCL() ) };
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( dstNative->getBufferCL() ), b3BufferInfoCL( srcNative->getBufferCL() ), b3BufferInfoCL( m_workBuffer->getBufferCL() ) };
|
||||
|
||||
btLauncherCL launcher( m_commandQueue, m_localScanKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3LauncherCL launcher( m_commandQueue, m_localScanKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( constBuffer );
|
||||
launcher.launch1D( numBlocks*BLOCK_SIZE, BLOCK_SIZE );
|
||||
}
|
||||
|
||||
{
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( m_workBuffer->getBufferCL() ) };
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_workBuffer->getBufferCL() ) };
|
||||
|
||||
btLauncherCL launcher( m_commandQueue, m_blockSumKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3LauncherCL launcher( m_commandQueue, m_blockSumKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( constBuffer );
|
||||
launcher.launch1D( BLOCK_SIZE, BLOCK_SIZE );
|
||||
}
|
||||
@@ -81,9 +81,9 @@ void btPrefixScanCL::execute(btOpenCLArray<unsigned int>& src, btOpenCLArray<uns
|
||||
|
||||
if( numBlocks > 1 )
|
||||
{
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( dstNative->getBufferCL() ), btBufferInfoCL( m_workBuffer->getBufferCL() ) };
|
||||
btLauncherCL launcher( m_commandQueue, m_propagationKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( dstNative->getBufferCL() ), b3BufferInfoCL( m_workBuffer->getBufferCL() ) };
|
||||
b3LauncherCL launcher( m_commandQueue, m_propagationKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( constBuffer );
|
||||
launcher.launch1D( (numBlocks-1)*BLOCK_SIZE, BLOCK_SIZE );
|
||||
}
|
||||
@@ -98,7 +98,7 @@ void btPrefixScanCL::execute(btOpenCLArray<unsigned int>& src, btOpenCLArray<uns
|
||||
}
|
||||
|
||||
|
||||
void btPrefixScanCL::executeHost(b3AlignedObjectArray<unsigned int>& src, b3AlignedObjectArray<unsigned int>& dst, int n, unsigned int* sum)
|
||||
void b3PrefixScanCL::executeHost(b3AlignedObjectArray<unsigned int>& src, b3AlignedObjectArray<unsigned int>& dst, int n, unsigned int* sum)
|
||||
{
|
||||
unsigned int s = 0;
|
||||
//if( data->m_option == EXCLUSIVE )
|
||||
@@ -1,12 +1,12 @@
|
||||
|
||||
#ifndef BT_PREFIX_SCAN_CL_H
|
||||
#define BT_PREFIX_SCAN_CL_H
|
||||
#ifndef B3_PREFIX_SCAN_CL_H
|
||||
#define B3_PREFIX_SCAN_CL_H
|
||||
|
||||
#include "btOpenCLArray.h"
|
||||
#include "btBufferInfoCL.h"
|
||||
#include "b3OpenCLArray.h"
|
||||
#include "b3BufferInfoCL.h"
|
||||
#include "Bullet3Common/b3AlignedObjectArray.h"
|
||||
|
||||
class btPrefixScanCL
|
||||
class b3PrefixScanCL
|
||||
{
|
||||
enum
|
||||
{
|
||||
@@ -21,17 +21,17 @@ class btPrefixScanCL
|
||||
cl_kernel m_blockSumKernel;
|
||||
cl_kernel m_propagationKernel;
|
||||
|
||||
btOpenCLArray<unsigned int>* m_workBuffer;
|
||||
b3OpenCLArray<unsigned int>* m_workBuffer;
|
||||
|
||||
|
||||
public:
|
||||
|
||||
btPrefixScanCL(cl_context ctx, cl_device_id device, cl_command_queue queue,int size=0);
|
||||
b3PrefixScanCL(cl_context ctx, cl_device_id device, cl_command_queue queue,int size=0);
|
||||
|
||||
virtual ~btPrefixScanCL();
|
||||
virtual ~b3PrefixScanCL();
|
||||
|
||||
void execute(btOpenCLArray<unsigned int>& src, btOpenCLArray<unsigned int>& dst, int n, unsigned int* sum = 0);
|
||||
void execute(b3OpenCLArray<unsigned int>& src, b3OpenCLArray<unsigned int>& dst, int n, unsigned int* sum = 0);
|
||||
void executeHost(b3AlignedObjectArray<unsigned int>& src, b3AlignedObjectArray<unsigned int>& dst, int n, unsigned int* sum);
|
||||
};
|
||||
|
||||
#endif //BT_PREFIX_SCAN_CL_H
|
||||
#endif //B3_PREFIX_SCAN_CL_H
|
||||
@@ -1,27 +1,27 @@
|
||||
|
||||
#include "btRadixSort32CL.h"
|
||||
#include "btLauncherCL.h"
|
||||
#include "b3RadixSort32CL.h"
|
||||
#include "b3LauncherCL.h"
|
||||
#include "../../basic_initialize/b3OpenCLUtils.h"
|
||||
#include "btPrefixScanCL.h"
|
||||
#include "btFillCL.h"
|
||||
#include "b3PrefixScanCL.h"
|
||||
#include "b3FillCL.h"
|
||||
|
||||
#define RADIXSORT32_PATH "opencl/parallel_primitives/kernels/RadixSort32Kernels.cl"
|
||||
|
||||
#include "../kernels/RadixSort32KernelsCL.h"
|
||||
|
||||
btRadixSort32CL::btRadixSort32CL(cl_context ctx, cl_device_id device, cl_command_queue queue, int initialCapacity)
|
||||
b3RadixSort32CL::b3RadixSort32CL(cl_context ctx, cl_device_id device, cl_command_queue queue, int initialCapacity)
|
||||
:m_commandQueue(queue)
|
||||
{
|
||||
btOpenCLDeviceInfo info;
|
||||
b3OpenCLDeviceInfo info;
|
||||
b3OpenCLUtils::getDeviceInfo(device,&info);
|
||||
m_deviceCPU = (info.m_deviceType & CL_DEVICE_TYPE_CPU)!=0;
|
||||
|
||||
m_workBuffer1 = new btOpenCLArray<unsigned int>(ctx,queue);
|
||||
m_workBuffer2 = new btOpenCLArray<unsigned int>(ctx,queue);
|
||||
m_workBuffer3 = new btOpenCLArray<btSortData>(ctx,queue);
|
||||
m_workBuffer3a = new btOpenCLArray<unsigned int>(ctx,queue);
|
||||
m_workBuffer4 = new btOpenCLArray<btSortData>(ctx,queue);
|
||||
m_workBuffer4a = new btOpenCLArray<unsigned int>(ctx,queue);
|
||||
m_workBuffer1 = new b3OpenCLArray<unsigned int>(ctx,queue);
|
||||
m_workBuffer2 = new b3OpenCLArray<unsigned int>(ctx,queue);
|
||||
m_workBuffer3 = new b3OpenCLArray<b3SortData>(ctx,queue);
|
||||
m_workBuffer3a = new b3OpenCLArray<unsigned int>(ctx,queue);
|
||||
m_workBuffer4 = new b3OpenCLArray<b3SortData>(ctx,queue);
|
||||
m_workBuffer4a = new b3OpenCLArray<unsigned int>(ctx,queue);
|
||||
|
||||
|
||||
if (initialCapacity>0)
|
||||
@@ -33,8 +33,8 @@ btRadixSort32CL::btRadixSort32CL(cl_context ctx, cl_device_id device, cl_command
|
||||
m_workBuffer4a->resize(initialCapacity);
|
||||
}
|
||||
|
||||
m_scan = new btPrefixScanCL(ctx,device,queue);
|
||||
m_fill = new btFillCL(ctx,device,queue);
|
||||
m_scan = new b3PrefixScanCL(ctx,device,queue);
|
||||
m_fill = new b3FillCL(ctx,device,queue);
|
||||
|
||||
const char* additionalMacros = "";
|
||||
const char* srcFileNameForCaching="";
|
||||
@@ -43,15 +43,15 @@ btRadixSort32CL::btRadixSort32CL(cl_context ctx, cl_device_id device, cl_command
|
||||
const char* kernelSource = radixSort32KernelsCL;
|
||||
|
||||
cl_program sortProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, kernelSource, &pErrNum,additionalMacros, RADIXSORT32_PATH);
|
||||
btAssert(sortProg);
|
||||
b3Assert(sortProg);
|
||||
|
||||
m_streamCountSortDataKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "StreamCountSortDataKernel", &pErrNum, sortProg,additionalMacros );
|
||||
btAssert(m_streamCountSortDataKernel );
|
||||
b3Assert(m_streamCountSortDataKernel );
|
||||
|
||||
|
||||
|
||||
m_streamCountKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "StreamCountKernel", &pErrNum, sortProg,additionalMacros );
|
||||
btAssert(m_streamCountKernel);
|
||||
b3Assert(m_streamCountKernel);
|
||||
|
||||
|
||||
|
||||
@@ -59,23 +59,23 @@ btRadixSort32CL::btRadixSort32CL(cl_context ctx, cl_device_id device, cl_command
|
||||
{
|
||||
|
||||
m_sortAndScatterSortDataKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SortAndScatterSortDataKernelSerial", &pErrNum, sortProg,additionalMacros );
|
||||
btAssert(m_sortAndScatterSortDataKernel);
|
||||
b3Assert(m_sortAndScatterSortDataKernel);
|
||||
m_sortAndScatterKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SortAndScatterKernelSerial", &pErrNum, sortProg,additionalMacros );
|
||||
btAssert(m_sortAndScatterKernel);
|
||||
b3Assert(m_sortAndScatterKernel);
|
||||
} else
|
||||
{
|
||||
m_sortAndScatterSortDataKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SortAndScatterSortDataKernel", &pErrNum, sortProg,additionalMacros );
|
||||
btAssert(m_sortAndScatterSortDataKernel);
|
||||
b3Assert(m_sortAndScatterSortDataKernel);
|
||||
m_sortAndScatterKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SortAndScatterKernel", &pErrNum, sortProg,additionalMacros );
|
||||
btAssert(m_sortAndScatterKernel);
|
||||
b3Assert(m_sortAndScatterKernel);
|
||||
}
|
||||
|
||||
m_prefixScanKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "PrefixScanKernel", &pErrNum, sortProg,additionalMacros );
|
||||
btAssert(m_prefixScanKernel);
|
||||
b3Assert(m_prefixScanKernel);
|
||||
|
||||
}
|
||||
|
||||
btRadixSort32CL::~btRadixSort32CL()
|
||||
b3RadixSort32CL::~b3RadixSort32CL()
|
||||
{
|
||||
delete m_scan;
|
||||
delete m_fill;
|
||||
@@ -93,7 +93,7 @@ btRadixSort32CL::~btRadixSort32CL()
|
||||
clReleaseKernel(m_prefixScanKernel);
|
||||
}
|
||||
|
||||
void btRadixSort32CL::executeHost(b3AlignedObjectArray<btSortData>& inout, int sortBits /* = 32 */)
|
||||
void b3RadixSort32CL::executeHost(b3AlignedObjectArray<b3SortData>& inout, int sortBits /* = 32 */)
|
||||
{
|
||||
int n = inout.size();
|
||||
const int BITS_PER_PASS = 8;
|
||||
@@ -103,10 +103,10 @@ void btRadixSort32CL::executeHost(b3AlignedObjectArray<btSortData>& inout, int s
|
||||
int tables[NUM_TABLES];
|
||||
int counter[NUM_TABLES];
|
||||
|
||||
btSortData* src = &inout[0];
|
||||
b3AlignedObjectArray<btSortData> workbuffer;
|
||||
b3SortData* src = &inout[0];
|
||||
b3AlignedObjectArray<b3SortData> workbuffer;
|
||||
workbuffer.resize(inout.size());
|
||||
btSortData* dst = &workbuffer[0];
|
||||
b3SortData* dst = &workbuffer[0];
|
||||
|
||||
int count=0;
|
||||
for(int startBit=0; startBit<sortBits; startBit+=BITS_PER_PASS)
|
||||
@@ -152,21 +152,21 @@ void btRadixSort32CL::executeHost(b3AlignedObjectArray<btSortData>& inout, int s
|
||||
counter[tableIdx] ++;
|
||||
}
|
||||
|
||||
btSwap( src, dst );
|
||||
b3Swap( src, dst );
|
||||
count++;
|
||||
}
|
||||
|
||||
if (count&1)
|
||||
{
|
||||
btAssert(0);//need to copy
|
||||
b3Assert(0);//need to copy
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
void btRadixSort32CL::executeHost(btOpenCLArray<btSortData>& keyValuesInOut, int sortBits /* = 32 */)
|
||||
void b3RadixSort32CL::executeHost(b3OpenCLArray<b3SortData>& keyValuesInOut, int sortBits /* = 32 */)
|
||||
{
|
||||
|
||||
b3AlignedObjectArray<btSortData> inout;
|
||||
b3AlignedObjectArray<b3SortData> inout;
|
||||
keyValuesInOut.copyToHost(inout);
|
||||
|
||||
executeHost(inout,sortBits);
|
||||
@@ -174,8 +174,8 @@ void btRadixSort32CL::executeHost(btOpenCLArray<btSortData>& keyValuesInOut, int
|
||||
keyValuesInOut.copyFromHost(inout);
|
||||
}
|
||||
|
||||
void btRadixSort32CL::execute(btOpenCLArray<unsigned int>& keysIn, btOpenCLArray<unsigned int>& keysOut, btOpenCLArray<unsigned int>& valuesIn,
|
||||
btOpenCLArray<unsigned int>& valuesOut, int n, int sortBits)
|
||||
void b3RadixSort32CL::execute(b3OpenCLArray<unsigned int>& keysIn, b3OpenCLArray<unsigned int>& keysOut, b3OpenCLArray<unsigned int>& valuesIn,
|
||||
b3OpenCLArray<unsigned int>& valuesOut, int n, int sortBits)
|
||||
{
|
||||
|
||||
}
|
||||
@@ -184,7 +184,7 @@ void btRadixSort32CL::execute(btOpenCLArray<unsigned int>& keysIn, btOpenCLArray
|
||||
//#define DEBUG_RADIXSORT2
|
||||
|
||||
|
||||
void btRadixSort32CL::execute(btOpenCLArray<btSortData>& keyValuesInOut, int sortBits /* = 32 */)
|
||||
void b3RadixSort32CL::execute(b3OpenCLArray<b3SortData>& keyValuesInOut, int sortBits /* = 32 */)
|
||||
{
|
||||
|
||||
int originalSize = keyValuesInOut.size();
|
||||
@@ -194,7 +194,7 @@ void btRadixSort32CL::execute(btOpenCLArray<btSortData>& keyValuesInOut, int sor
|
||||
int dataAlignment = DATA_ALIGNMENT;
|
||||
|
||||
#ifdef DEBUG_RADIXSORT2
|
||||
b3AlignedObjectArray<btSortData> test2;
|
||||
b3AlignedObjectArray<b3SortData> test2;
|
||||
keyValuesInOut.copyToHost(test2);
|
||||
printf("numElem = %d\n",test2.size());
|
||||
for (int i=0;i<test2.size();i++)
|
||||
@@ -204,20 +204,20 @@ void btRadixSort32CL::execute(btOpenCLArray<btSortData>& keyValuesInOut, int sor
|
||||
}
|
||||
#endif //DEBUG_RADIXSORT2
|
||||
|
||||
btOpenCLArray<btSortData>* src = 0;
|
||||
b3OpenCLArray<b3SortData>* src = 0;
|
||||
|
||||
if (workingSize%dataAlignment)
|
||||
{
|
||||
workingSize += dataAlignment-(workingSize%dataAlignment);
|
||||
m_workBuffer4->copyFromOpenCLArray(keyValuesInOut);
|
||||
m_workBuffer4->resize(workingSize);
|
||||
btSortData fillValue;
|
||||
b3SortData fillValue;
|
||||
fillValue.m_key = 0xffffffff;
|
||||
fillValue.m_value = 0xffffffff;
|
||||
|
||||
#define USE_BTFILL
|
||||
#ifdef USE_BTFILL
|
||||
m_fill->execute((btOpenCLArray<btInt2>&)*m_workBuffer4,(btInt2&)fillValue,workingSize-originalSize,originalSize);
|
||||
m_fill->execute((b3OpenCLArray<b3Int2>&)*m_workBuffer4,(b3Int2&)fillValue,workingSize-originalSize,originalSize);
|
||||
#else
|
||||
//fill the remaining bits (very slow way, todo: fill on GPU/OpenCL side)
|
||||
|
||||
@@ -234,7 +234,7 @@ void btRadixSort32CL::execute(btOpenCLArray<btSortData>& keyValuesInOut, int sor
|
||||
m_workBuffer4->resize(0);
|
||||
}
|
||||
|
||||
btAssert( workingSize%DATA_ALIGNMENT == 0 );
|
||||
b3Assert( workingSize%DATA_ALIGNMENT == 0 );
|
||||
int minCap = NUM_BUCKET*NUM_WGS;
|
||||
|
||||
|
||||
@@ -245,20 +245,20 @@ void btRadixSort32CL::execute(btOpenCLArray<btSortData>& keyValuesInOut, int sor
|
||||
|
||||
|
||||
// ADLASSERT( ELEMENTS_PER_WORK_ITEM == 4 );
|
||||
btAssert( BITS_PER_PASS == 4 );
|
||||
btAssert( WG_SIZE == 64 );
|
||||
btAssert( (sortBits&0x3) == 0 );
|
||||
b3Assert( BITS_PER_PASS == 4 );
|
||||
b3Assert( WG_SIZE == 64 );
|
||||
b3Assert( (sortBits&0x3) == 0 );
|
||||
|
||||
|
||||
|
||||
btOpenCLArray<btSortData>* dst = m_workBuffer3;
|
||||
b3OpenCLArray<b3SortData>* dst = m_workBuffer3;
|
||||
|
||||
btOpenCLArray<unsigned int>* srcHisto = m_workBuffer1;
|
||||
btOpenCLArray<unsigned int>* destHisto = m_workBuffer2;
|
||||
b3OpenCLArray<unsigned int>* srcHisto = m_workBuffer1;
|
||||
b3OpenCLArray<unsigned int>* destHisto = m_workBuffer2;
|
||||
|
||||
|
||||
int nWGs = NUM_WGS;
|
||||
btConstData cdata;
|
||||
b3ConstData cdata;
|
||||
|
||||
{
|
||||
int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE;//set at 256
|
||||
@@ -294,10 +294,10 @@ void btRadixSort32CL::execute(btOpenCLArray<btSortData>& keyValuesInOut, int sor
|
||||
|
||||
if (src->size())
|
||||
{
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( src->getBufferCL(), true ), btBufferInfoCL( srcHisto->getBufferCL() ) };
|
||||
btLauncherCL launcher(m_commandQueue, m_streamCountSortDataKernel);
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src->getBufferCL(), true ), b3BufferInfoCL( srcHisto->getBufferCL() ) };
|
||||
b3LauncherCL launcher(m_commandQueue, m_streamCountSortDataKernel);
|
||||
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( cdata );
|
||||
|
||||
int num = NUM_WGS*WG_SIZE;
|
||||
@@ -328,9 +328,9 @@ void btRadixSort32CL::execute(btOpenCLArray<btSortData>& keyValuesInOut, int sor
|
||||
|
||||
if (fastScan)
|
||||
{// prefix scan group histogram
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( srcHisto->getBufferCL() ) };
|
||||
btLauncherCL launcher( m_commandQueue, m_prefixScanKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( srcHisto->getBufferCL() ) };
|
||||
b3LauncherCL launcher( m_commandQueue, m_prefixScanKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( cdata );
|
||||
launcher.launch1D( 128, 128 );
|
||||
destHisto = srcHisto;
|
||||
@@ -362,9 +362,9 @@ void btRadixSort32CL::execute(btOpenCLArray<btSortData>& keyValuesInOut, int sor
|
||||
|
||||
if (src->size())
|
||||
{// local sort and distribute
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( src->getBufferCL(), true ), btBufferInfoCL( destHisto->getBufferCL(), true ), btBufferInfoCL( dst->getBufferCL() )};
|
||||
btLauncherCL launcher( m_commandQueue, m_sortAndScatterSortDataKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src->getBufferCL(), true ), b3BufferInfoCL( destHisto->getBufferCL(), true ), b3BufferInfoCL( dst->getBufferCL() )};
|
||||
b3LauncherCL launcher( m_commandQueue, m_sortAndScatterSortDataKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( cdata );
|
||||
launcher.launch1D( nWGs*WG_SIZE, WG_SIZE );
|
||||
|
||||
@@ -379,8 +379,8 @@ void btRadixSort32CL::execute(btOpenCLArray<btSortData>& keyValuesInOut, int sor
|
||||
int startBit = ib;
|
||||
|
||||
destHisto->copyToHost(testHist);
|
||||
b3AlignedObjectArray<btSortData> srcHost;
|
||||
b3AlignedObjectArray<btSortData> dstHost;
|
||||
b3AlignedObjectArray<b3SortData> srcHost;
|
||||
b3AlignedObjectArray<b3SortData> dstHost;
|
||||
dstHost.resize(src->size());
|
||||
|
||||
src->copyToHost(srcHost);
|
||||
@@ -405,11 +405,11 @@ void btRadixSort32CL::execute(btOpenCLArray<btSortData>& keyValuesInOut, int sor
|
||||
int counter2[NUM_TABLES]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
|
||||
|
||||
int tables[NUM_TABLES];
|
||||
b3AlignedObjectArray<btSortData> dstHostOK;
|
||||
b3AlignedObjectArray<b3SortData> dstHostOK;
|
||||
dstHostOK.resize(src->size());
|
||||
|
||||
destHisto->copyToHost(testHist);
|
||||
b3AlignedObjectArray<btSortData> srcHost;
|
||||
b3AlignedObjectArray<b3SortData> srcHost;
|
||||
src->copyToHost(srcHost);
|
||||
|
||||
int blockSize = 256;
|
||||
@@ -435,7 +435,7 @@ void btRadixSort32CL::execute(btOpenCLArray<btSortData>& keyValuesInOut, int sor
|
||||
}
|
||||
|
||||
|
||||
b3AlignedObjectArray<btSortData> dstHost;
|
||||
b3AlignedObjectArray<b3SortData> dstHost;
|
||||
dstHost.resize(src->size());
|
||||
|
||||
|
||||
@@ -449,7 +449,7 @@ void btRadixSort32CL::execute(btOpenCLArray<btSortData>& keyValuesInOut, int sor
|
||||
|
||||
int nBlocks = (n)/blockSize - nBlocksPerWG*wgIdx;
|
||||
|
||||
for(int iblock=0; iblock<btMin(cdata.m_nBlocksPerWG, nBlocks); iblock++)
|
||||
for(int iblock=0; iblock<b3Min(cdata.m_nBlocksPerWG, nBlocks); iblock++)
|
||||
{
|
||||
for (int lIdx = 0;lIdx < 64;lIdx++)
|
||||
{
|
||||
@@ -470,7 +470,7 @@ void btRadixSort32CL::execute(btOpenCLArray<btSortData>& keyValuesInOut, int sor
|
||||
|
||||
int destIndex = testHist[tableIdx*NUM_WGS+wgIdx] + counter[tableIdx];
|
||||
|
||||
btSortData ok = dstHostOK[destIndex];
|
||||
b3SortData ok = dstHostOK[destIndex];
|
||||
|
||||
if (ok.m_key != srcHost[i].m_key)
|
||||
{
|
||||
@@ -512,8 +512,8 @@ void btRadixSort32CL::execute(btOpenCLArray<btSortData>& keyValuesInOut, int sor
|
||||
printf("testHist[%d]=%d\n",i,testHist[i]);
|
||||
}
|
||||
#endif //DEBUG_RADIXSORT
|
||||
btSwap(src, dst );
|
||||
btSwap(srcHisto,destHisto);
|
||||
b3Swap(src, dst );
|
||||
b3Swap(srcHisto,destHisto);
|
||||
|
||||
#ifdef DEBUG_RADIXSORT2
|
||||
keyValuesInOut.copyToHost(test2);
|
||||
@@ -537,7 +537,7 @@ void btRadixSort32CL::execute(btOpenCLArray<btSortData>& keyValuesInOut, int sor
|
||||
|
||||
if (count&1)
|
||||
{
|
||||
btAssert(0);//need to copy from workbuffer to keyValuesInOut
|
||||
b3Assert(0);//need to copy from workbuffer to keyValuesInOut
|
||||
}
|
||||
|
||||
if (m_workBuffer4->size())
|
||||
@@ -565,7 +565,7 @@ void btRadixSort32CL::execute(btOpenCLArray<btSortData>& keyValuesInOut, int sor
|
||||
|
||||
|
||||
|
||||
void btRadixSort32CL::execute(btOpenCLArray<unsigned int>& keysInOut, int sortBits /* = 32 */)
|
||||
void b3RadixSort32CL::execute(b3OpenCLArray<unsigned int>& keysInOut, int sortBits /* = 32 */)
|
||||
{
|
||||
int originalSize = keysInOut.size();
|
||||
int workingSize = originalSize;
|
||||
@@ -573,7 +573,7 @@ void btRadixSort32CL::execute(btOpenCLArray<unsigned int>& keysInOut, int sortBi
|
||||
|
||||
int dataAlignment = DATA_ALIGNMENT;
|
||||
|
||||
btOpenCLArray<unsigned int>* src = 0;
|
||||
b3OpenCLArray<unsigned int>* src = 0;
|
||||
|
||||
if (workingSize%dataAlignment)
|
||||
{
|
||||
@@ -593,7 +593,7 @@ void btRadixSort32CL::execute(btOpenCLArray<unsigned int>& keysInOut, int sortBi
|
||||
|
||||
|
||||
|
||||
btAssert( workingSize%DATA_ALIGNMENT == 0 );
|
||||
b3Assert( workingSize%DATA_ALIGNMENT == 0 );
|
||||
int minCap = NUM_BUCKET*NUM_WGS;
|
||||
|
||||
|
||||
@@ -605,20 +605,20 @@ void btRadixSort32CL::execute(btOpenCLArray<unsigned int>& keysInOut, int sortBi
|
||||
m_workBuffer3a->resize(workingSize);
|
||||
|
||||
// ADLASSERT( ELEMENTS_PER_WORK_ITEM == 4 );
|
||||
btAssert( BITS_PER_PASS == 4 );
|
||||
btAssert( WG_SIZE == 64 );
|
||||
btAssert( (sortBits&0x3) == 0 );
|
||||
b3Assert( BITS_PER_PASS == 4 );
|
||||
b3Assert( WG_SIZE == 64 );
|
||||
b3Assert( (sortBits&0x3) == 0 );
|
||||
|
||||
|
||||
|
||||
btOpenCLArray<unsigned int>* dst = m_workBuffer3a;
|
||||
b3OpenCLArray<unsigned int>* dst = m_workBuffer3a;
|
||||
|
||||
btOpenCLArray<unsigned int>* srcHisto = m_workBuffer1;
|
||||
btOpenCLArray<unsigned int>* destHisto = m_workBuffer2;
|
||||
b3OpenCLArray<unsigned int>* srcHisto = m_workBuffer1;
|
||||
b3OpenCLArray<unsigned int>* destHisto = m_workBuffer2;
|
||||
|
||||
|
||||
int nWGs = NUM_WGS;
|
||||
btConstData cdata;
|
||||
b3ConstData cdata;
|
||||
|
||||
{
|
||||
int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE;//set at 256
|
||||
@@ -641,10 +641,10 @@ void btRadixSort32CL::execute(btOpenCLArray<unsigned int>& keysInOut, int sortBi
|
||||
|
||||
if (src->size())
|
||||
{
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( src->getBufferCL(), true ), btBufferInfoCL( srcHisto->getBufferCL() ) };
|
||||
btLauncherCL launcher(m_commandQueue, m_streamCountKernel);
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src->getBufferCL(), true ), b3BufferInfoCL( srcHisto->getBufferCL() ) };
|
||||
b3LauncherCL launcher(m_commandQueue, m_streamCountKernel);
|
||||
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( cdata );
|
||||
|
||||
int num = NUM_WGS*WG_SIZE;
|
||||
@@ -663,9 +663,9 @@ void btRadixSort32CL::execute(btOpenCLArray<unsigned int>& keysInOut, int sortBi
|
||||
|
||||
if (fastScan)
|
||||
{// prefix scan group histogram
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( srcHisto->getBufferCL() ) };
|
||||
btLauncherCL launcher( m_commandQueue, m_prefixScanKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( srcHisto->getBufferCL() ) };
|
||||
b3LauncherCL launcher( m_commandQueue, m_prefixScanKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( cdata );
|
||||
launcher.launch1D( 128, 128 );
|
||||
destHisto = srcHisto;
|
||||
@@ -677,23 +677,23 @@ void btRadixSort32CL::execute(btOpenCLArray<unsigned int>& keysInOut, int sortBi
|
||||
|
||||
if (src->size())
|
||||
{// local sort and distribute
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( src->getBufferCL(), true ), btBufferInfoCL( destHisto->getBufferCL(), true ), btBufferInfoCL( dst->getBufferCL() )};
|
||||
btLauncherCL launcher( m_commandQueue, m_sortAndScatterKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src->getBufferCL(), true ), b3BufferInfoCL( destHisto->getBufferCL(), true ), b3BufferInfoCL( dst->getBufferCL() )};
|
||||
b3LauncherCL launcher( m_commandQueue, m_sortAndScatterKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( cdata );
|
||||
launcher.launch1D( nWGs*WG_SIZE, WG_SIZE );
|
||||
|
||||
}
|
||||
|
||||
btSwap(src, dst );
|
||||
btSwap(srcHisto,destHisto);
|
||||
b3Swap(src, dst );
|
||||
b3Swap(srcHisto,destHisto);
|
||||
|
||||
count++;
|
||||
}
|
||||
|
||||
if (count&1)
|
||||
{
|
||||
btAssert(0);//need to copy from workbuffer to keyValuesInOut
|
||||
b3Assert(0);//need to copy from workbuffer to keyValuesInOut
|
||||
}
|
||||
|
||||
if (m_workBuffer4a->size())
|
||||
85
opencl/parallel_primitives/host/b3RadixSort32CL.h
Normal file
85
opencl/parallel_primitives/host/b3RadixSort32CL.h
Normal file
@@ -0,0 +1,85 @@
|
||||
|
||||
#ifndef B3_RADIXSORT32_H
|
||||
#define B3_RADIXSORT32_H
|
||||
|
||||
#include "b3OpenCLArray.h"
|
||||
|
||||
struct b3SortData
|
||||
{
|
||||
int m_key;
|
||||
int m_value;
|
||||
};
|
||||
#include "b3BufferInfoCL.h"
|
||||
|
||||
class b3RadixSort32CL
|
||||
{
|
||||
|
||||
b3OpenCLArray<unsigned int>* m_workBuffer1;
|
||||
b3OpenCLArray<unsigned int>* m_workBuffer2;
|
||||
|
||||
b3OpenCLArray<b3SortData>* m_workBuffer3;
|
||||
b3OpenCLArray<b3SortData>* m_workBuffer4;
|
||||
|
||||
b3OpenCLArray<unsigned int>* m_workBuffer3a;
|
||||
b3OpenCLArray<unsigned int>* m_workBuffer4a;
|
||||
|
||||
cl_command_queue m_commandQueue;
|
||||
|
||||
cl_kernel m_streamCountSortDataKernel;
|
||||
cl_kernel m_streamCountKernel;
|
||||
|
||||
cl_kernel m_prefixScanKernel;
|
||||
cl_kernel m_sortAndScatterSortDataKernel;
|
||||
cl_kernel m_sortAndScatterKernel;
|
||||
|
||||
|
||||
bool m_deviceCPU;
|
||||
|
||||
class b3PrefixScanCL* m_scan;
|
||||
class b3FillCL* m_fill;
|
||||
|
||||
public:
|
||||
struct b3ConstData
|
||||
{
|
||||
int m_n;
|
||||
int m_nWGs;
|
||||
int m_startBit;
|
||||
int m_nBlocksPerWG;
|
||||
};
|
||||
enum
|
||||
{
|
||||
DATA_ALIGNMENT = 256,
|
||||
WG_SIZE = 64,
|
||||
BLOCK_SIZE = 256,
|
||||
ELEMENTS_PER_WORK_ITEM = (BLOCK_SIZE/WG_SIZE),
|
||||
BITS_PER_PASS = 4,
|
||||
NUM_BUCKET=(1<<BITS_PER_PASS),
|
||||
// if you change this, change nPerWI in kernel as well
|
||||
NUM_WGS = 20*6, // cypress
|
||||
// NUM_WGS = 24*6, // cayman
|
||||
// NUM_WGS = 32*4, // nv
|
||||
};
|
||||
|
||||
|
||||
private:
|
||||
|
||||
|
||||
public:
|
||||
|
||||
b3RadixSort32CL(cl_context ctx, cl_device_id device, cl_command_queue queue, int initialCapacity =0);
|
||||
|
||||
virtual ~b3RadixSort32CL();
|
||||
|
||||
void execute(b3OpenCLArray<unsigned int>& keysIn, b3OpenCLArray<unsigned int>& keysOut, b3OpenCLArray<unsigned int>& valuesIn,
|
||||
b3OpenCLArray<unsigned int>& valuesOut, int n, int sortBits = 32);
|
||||
|
||||
///keys only
|
||||
void execute(b3OpenCLArray<unsigned int>& keysInOut, int sortBits = 32 );
|
||||
|
||||
void execute(b3OpenCLArray<b3SortData>& keyValuesInOut, int sortBits = 32 );
|
||||
void executeHost(b3OpenCLArray<b3SortData>& keyValuesInOut, int sortBits = 32);
|
||||
void executeHost(b3AlignedObjectArray<b3SortData>& keyValuesInOut, int sortBits = 32);
|
||||
|
||||
};
|
||||
#endif //B3_RADIXSORT32_H
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
|
||||
#ifndef BT_BUFFER_INFO_CL_H
|
||||
#define BT_BUFFER_INFO_CL_H
|
||||
|
||||
#include "btOpenCLArray.h"
|
||||
|
||||
|
||||
struct btBufferInfoCL
|
||||
{
|
||||
//btBufferInfoCL(){}
|
||||
|
||||
// template<typename T>
|
||||
btBufferInfoCL(cl_mem buff, bool isReadOnly = false): m_clBuffer(buff), m_isReadOnly(isReadOnly){}
|
||||
|
||||
cl_mem m_clBuffer;
|
||||
bool m_isReadOnly;
|
||||
};
|
||||
|
||||
#endif //BT_BUFFER_INFO_CL_H
|
||||
@@ -1,63 +0,0 @@
|
||||
#ifndef BT_FILL_CL_H
|
||||
#define BT_FILL_CL_H
|
||||
|
||||
#include "btOpenCLArray.h"
|
||||
#include "Bullet3Common/b3Scalar.h"
|
||||
|
||||
#include "btInt2.h"
|
||||
#include "btInt4.h"
|
||||
|
||||
|
||||
class btFillCL
|
||||
{
|
||||
|
||||
cl_command_queue m_commandQueue;
|
||||
|
||||
cl_kernel m_fillKernelInt2;
|
||||
cl_kernel m_fillIntKernel;
|
||||
cl_kernel m_fillUnsignedIntKernel;
|
||||
cl_kernel m_fillFloatKernel;
|
||||
|
||||
public:
|
||||
|
||||
struct btConstData
|
||||
{
|
||||
union
|
||||
{
|
||||
btInt4 m_data;
|
||||
btUnsignedInt4 m_UnsignedData;
|
||||
};
|
||||
int m_offset;
|
||||
int m_n;
|
||||
int m_padding[2];
|
||||
};
|
||||
|
||||
protected:
|
||||
|
||||
public:
|
||||
|
||||
btFillCL(cl_context ctx, cl_device_id device, cl_command_queue queue);
|
||||
|
||||
virtual ~btFillCL();
|
||||
|
||||
void execute(btOpenCLArray<unsigned int>& src, const unsigned int value, int n, int offset = 0);
|
||||
|
||||
void execute(btOpenCLArray<int>& src, const int value, int n, int offset = 0);
|
||||
|
||||
void execute(btOpenCLArray<float>& src, const float value, int n, int offset = 0);
|
||||
|
||||
void execute(btOpenCLArray<btInt2>& src, const btInt2& value, int n, int offset = 0);
|
||||
|
||||
void executeHost(b3AlignedObjectArray<btInt2> &src, const btInt2 &value, int n, int offset);
|
||||
|
||||
void executeHost(b3AlignedObjectArray<int> &src, const int value, int n, int offset);
|
||||
|
||||
// void execute(btOpenCLArray<btInt4>& src, const btInt4& value, int n, int offset = 0);
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#endif //BT_FILL_CL_H
|
||||
@@ -1,85 +0,0 @@
|
||||
|
||||
#ifndef BT_RADIXSORT32_H
|
||||
#define BT_RADIXSORT32_H
|
||||
|
||||
#include "btOpenCLArray.h"
|
||||
|
||||
struct btSortData
|
||||
{
|
||||
int m_key;
|
||||
int m_value;
|
||||
};
|
||||
#include "btBufferInfoCL.h"
|
||||
|
||||
class btRadixSort32CL
|
||||
{
|
||||
|
||||
btOpenCLArray<unsigned int>* m_workBuffer1;
|
||||
btOpenCLArray<unsigned int>* m_workBuffer2;
|
||||
|
||||
btOpenCLArray<btSortData>* m_workBuffer3;
|
||||
btOpenCLArray<btSortData>* m_workBuffer4;
|
||||
|
||||
btOpenCLArray<unsigned int>* m_workBuffer3a;
|
||||
btOpenCLArray<unsigned int>* m_workBuffer4a;
|
||||
|
||||
cl_command_queue m_commandQueue;
|
||||
|
||||
cl_kernel m_streamCountSortDataKernel;
|
||||
cl_kernel m_streamCountKernel;
|
||||
|
||||
cl_kernel m_prefixScanKernel;
|
||||
cl_kernel m_sortAndScatterSortDataKernel;
|
||||
cl_kernel m_sortAndScatterKernel;
|
||||
|
||||
|
||||
bool m_deviceCPU;
|
||||
|
||||
class btPrefixScanCL* m_scan;
|
||||
class btFillCL* m_fill;
|
||||
|
||||
public:
|
||||
struct btConstData
|
||||
{
|
||||
int m_n;
|
||||
int m_nWGs;
|
||||
int m_startBit;
|
||||
int m_nBlocksPerWG;
|
||||
};
|
||||
enum
|
||||
{
|
||||
DATA_ALIGNMENT = 256,
|
||||
WG_SIZE = 64,
|
||||
BLOCK_SIZE = 256,
|
||||
ELEMENTS_PER_WORK_ITEM = (BLOCK_SIZE/WG_SIZE),
|
||||
BITS_PER_PASS = 4,
|
||||
NUM_BUCKET=(1<<BITS_PER_PASS),
|
||||
// if you change this, change nPerWI in kernel as well
|
||||
NUM_WGS = 20*6, // cypress
|
||||
// NUM_WGS = 24*6, // cayman
|
||||
// NUM_WGS = 32*4, // nv
|
||||
};
|
||||
|
||||
|
||||
private:
|
||||
|
||||
|
||||
public:
|
||||
|
||||
btRadixSort32CL(cl_context ctx, cl_device_id device, cl_command_queue queue, int initialCapacity =0);
|
||||
|
||||
virtual ~btRadixSort32CL();
|
||||
|
||||
void execute(btOpenCLArray<unsigned int>& keysIn, btOpenCLArray<unsigned int>& keysOut, btOpenCLArray<unsigned int>& valuesIn,
|
||||
btOpenCLArray<unsigned int>& valuesOut, int n, int sortBits = 32);
|
||||
|
||||
///keys only
|
||||
void execute(btOpenCLArray<unsigned int>& keysInOut, int sortBits = 32 );
|
||||
|
||||
void execute(btOpenCLArray<btSortData>& keyValuesInOut, int sortBits = 32 );
|
||||
void executeHost(btOpenCLArray<btSortData>& keyValuesInOut, int sortBits = 32);
|
||||
void executeHost(b3AlignedObjectArray<btSortData>& keyValuesInOut, int sortBits = 32);
|
||||
|
||||
};
|
||||
#endif //BT_RADIXSORT32_H
|
||||
|
||||
@@ -15,10 +15,10 @@ subject to the following restrictions:
|
||||
|
||||
#include <stdio.h>
|
||||
#include "../basic_initialize/b3OpenCLUtils.h"
|
||||
#include "../host/btFillCL.h"
|
||||
#include "../host/btBoundSearchCL.h"
|
||||
#include "../host/btRadixSort32CL.h"
|
||||
#include "../host/btPrefixScanCL.h"
|
||||
#include "../host/b3FillCL.h"
|
||||
#include "../host/b3BoundSearchCL.h"
|
||||
#include "../host/b3RadixSort32CL.h"
|
||||
#include "../host/b3PrefixScanCL.h"
|
||||
#include "Bullet3Common/b3CommandLineArgs.h"
|
||||
#include "Bullet3Common/b3MinMax.h"
|
||||
|
||||
@@ -50,7 +50,7 @@ void initCL(int preferredDeviceIndex, int preferredPlatformIndex)
|
||||
int numDev = b3OpenCLUtils::getNumDevices(g_context);
|
||||
if (numDev>0)
|
||||
{
|
||||
btOpenCLDeviceInfo info;
|
||||
b3OpenCLDeviceInfo info;
|
||||
g_device= b3OpenCLUtils::getDevice(g_context,0);
|
||||
g_queue = clCreateCommandQueue(g_context, g_device, 0, &ciErrNum);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
@@ -71,9 +71,9 @@ inline void fillIntTest()
|
||||
{
|
||||
TEST_INIT;
|
||||
|
||||
btFillCL* fillCL = new btFillCL(g_context,g_device,g_queue);
|
||||
b3FillCL* fillCL = new b3FillCL(g_context,g_device,g_queue);
|
||||
int maxSize=1024*256;
|
||||
btOpenCLArray<int> intBuffer(g_context,g_queue,maxSize);
|
||||
b3OpenCLArray<int> intBuffer(g_context,g_queue,maxSize);
|
||||
intBuffer.resize(maxSize);
|
||||
|
||||
#define NUM_TESTS 7
|
||||
@@ -81,7 +81,7 @@ inline void fillIntTest()
|
||||
int dx = maxSize/NUM_TESTS;
|
||||
for (int iter=0;iter<NUM_TESTS;iter++)
|
||||
{
|
||||
int size = btMin( 11+dx*iter, maxSize );
|
||||
int size = b3Min( 11+dx*iter, maxSize );
|
||||
|
||||
int value = 2;
|
||||
|
||||
@@ -126,9 +126,9 @@ T getRandom(const T& minV, const T& maxV)
|
||||
return (T)(minV + r*range);
|
||||
}
|
||||
|
||||
struct btSortDataCompare
|
||||
struct b3SortDataCompare
|
||||
{
|
||||
inline bool operator()(const btSortData& first, const btSortData& second) const
|
||||
inline bool operator()(const b3SortData& first, const b3SortData& second) const
|
||||
{
|
||||
return (first.m_key < second.m_key) || (first.m_key==second.m_key && first.m_value < second.m_value);
|
||||
}
|
||||
@@ -142,24 +142,24 @@ void boundSearchTest( )
|
||||
int maxSize = 1024*256;
|
||||
int bucketSize = 256;
|
||||
|
||||
btOpenCLArray<btSortData> srcCL(g_context,g_queue,maxSize);
|
||||
btOpenCLArray<unsigned int> upperCL(g_context,g_queue,maxSize);
|
||||
btOpenCLArray<unsigned int> lowerCL(g_context,g_queue,maxSize);
|
||||
b3OpenCLArray<b3SortData> srcCL(g_context,g_queue,maxSize);
|
||||
b3OpenCLArray<unsigned int> upperCL(g_context,g_queue,maxSize);
|
||||
b3OpenCLArray<unsigned int> lowerCL(g_context,g_queue,maxSize);
|
||||
|
||||
b3AlignedObjectArray<btSortData> srcHost;
|
||||
b3AlignedObjectArray<b3SortData> srcHost;
|
||||
b3AlignedObjectArray<unsigned int> upperHost;
|
||||
b3AlignedObjectArray<unsigned int> lowerHost;
|
||||
b3AlignedObjectArray<unsigned int> upperHostCompare;
|
||||
b3AlignedObjectArray<unsigned int> lowerHostCompare;
|
||||
|
||||
btBoundSearchCL* search = new btBoundSearchCL(g_context,g_device,g_queue, maxSize);
|
||||
b3BoundSearchCL* search = new b3BoundSearchCL(g_context,g_device,g_queue, maxSize);
|
||||
|
||||
|
||||
int dx = maxSize/NUM_TESTS;
|
||||
for(int iter=0; iter<NUM_TESTS; iter++)
|
||||
{
|
||||
|
||||
int size = btMin( 128+dx*iter, maxSize );
|
||||
int size = b3Min( 128+dx*iter, maxSize );
|
||||
|
||||
upperHost.resize(bucketSize);
|
||||
lowerHost.resize(bucketSize);
|
||||
@@ -170,7 +170,7 @@ void boundSearchTest( )
|
||||
|
||||
for(int i=0; i<size; i++)
|
||||
{
|
||||
btSortData v;
|
||||
b3SortData v;
|
||||
// v.m_key = i<2? 0 : 5;
|
||||
v.m_key = getRandom(0,bucketSize);
|
||||
|
||||
@@ -178,7 +178,7 @@ void boundSearchTest( )
|
||||
srcHost.at(i) = v;
|
||||
}
|
||||
|
||||
srcHost.quickSort(btSortDataCompare());
|
||||
srcHost.quickSort(b3SortDataCompare());
|
||||
srcCL.copyFromHost(srcHost);
|
||||
|
||||
{
|
||||
@@ -194,11 +194,11 @@ void boundSearchTest( )
|
||||
lowerCL.copyFromHost(lowerHost);
|
||||
}
|
||||
|
||||
search->execute(srcCL,size,upperCL,bucketSize,btBoundSearchCL::BOUND_UPPER);
|
||||
search->execute(srcCL,size,lowerCL,bucketSize,btBoundSearchCL::BOUND_LOWER);
|
||||
search->execute(srcCL,size,upperCL,bucketSize,b3BoundSearchCL::BOUND_UPPER);
|
||||
search->execute(srcCL,size,lowerCL,bucketSize,b3BoundSearchCL::BOUND_LOWER);
|
||||
|
||||
search->executeHost(srcHost,size,upperHostCompare,bucketSize,btBoundSearchCL::BOUND_UPPER);
|
||||
search->executeHost(srcHost,size,lowerHostCompare,bucketSize,btBoundSearchCL::BOUND_LOWER);
|
||||
search->executeHost(srcHost,size,upperHostCompare,bucketSize,b3BoundSearchCL::BOUND_UPPER);
|
||||
search->executeHost(srcHost,size,lowerHostCompare,bucketSize,b3BoundSearchCL::BOUND_LOWER);
|
||||
|
||||
lowerCL.copyToHost(lowerHost);
|
||||
upperCL.copyToHost(upperHost);
|
||||
@@ -263,16 +263,16 @@ void prefixScanTest()
|
||||
b3AlignedObjectArray<unsigned int> buf0Host;
|
||||
b3AlignedObjectArray<unsigned int> buf1Host;
|
||||
|
||||
btOpenCLArray<unsigned int> buf2CL(g_context,g_queue,maxSize);
|
||||
btOpenCLArray<unsigned int> buf3CL(g_context,g_queue,maxSize);
|
||||
b3OpenCLArray<unsigned int> buf2CL(g_context,g_queue,maxSize);
|
||||
b3OpenCLArray<unsigned int> buf3CL(g_context,g_queue,maxSize);
|
||||
|
||||
|
||||
btPrefixScanCL* scan = new btPrefixScanCL(g_context,g_device,g_queue,maxSize);
|
||||
b3PrefixScanCL* scan = new b3PrefixScanCL(g_context,g_device,g_queue,maxSize);
|
||||
|
||||
int dx = maxSize/NUM_TESTS;
|
||||
for(int iter=0; iter<NUM_TESTS; iter++)
|
||||
{
|
||||
int size = btMin( 128+dx*iter, maxSize );
|
||||
int size = b3Min( 128+dx*iter, maxSize );
|
||||
buf0Host.resize(size);
|
||||
buf1Host.resize(size);
|
||||
|
||||
@@ -305,25 +305,25 @@ bool radixSortTest()
|
||||
|
||||
int maxSize = 1024*256;
|
||||
|
||||
b3AlignedObjectArray<btSortData> buf0Host;
|
||||
b3AlignedObjectArray<b3SortData> buf0Host;
|
||||
buf0Host.resize(maxSize);
|
||||
b3AlignedObjectArray<btSortData> buf1Host;
|
||||
b3AlignedObjectArray<b3SortData> buf1Host;
|
||||
buf1Host.resize(maxSize );
|
||||
btOpenCLArray<btSortData> buf2CL(g_context,g_queue,maxSize);
|
||||
b3OpenCLArray<b3SortData> buf2CL(g_context,g_queue,maxSize);
|
||||
|
||||
btRadixSort32CL* sort = new btRadixSort32CL(g_context,g_device,g_queue,maxSize);
|
||||
b3RadixSort32CL* sort = new b3RadixSort32CL(g_context,g_device,g_queue,maxSize);
|
||||
|
||||
int dx = maxSize/NUM_TESTS;
|
||||
for(int iter=0; iter<NUM_TESTS; iter++)
|
||||
{
|
||||
int size = btMin( 128+dx*iter, maxSize-512 );
|
||||
int size = b3Min( 128+dx*iter, maxSize-512 );
|
||||
size = NEXTMULTIPLEOF( size, 512 );//not necessary
|
||||
|
||||
buf0Host.resize(size);
|
||||
|
||||
for(int i=0; i<size; i++)
|
||||
{
|
||||
btSortData v;
|
||||
b3SortData v;
|
||||
v.m_key = getRandom(0,0xff);
|
||||
v.m_value = i;
|
||||
buf0Host[i] = v;
|
||||
|
||||
@@ -19,14 +19,14 @@ function createProject(vendor)
|
||||
"../../basic_initialize/b3OpenCLInclude.h",
|
||||
"../../basic_initialize/b3OpenCLUtils.cpp",
|
||||
"../../basic_initialize/b3OpenCLUtils.h",
|
||||
"../host/btFillCL.cpp",
|
||||
"../host/btFillCL.h",
|
||||
"../host/btBoundSearchCL.cpp",
|
||||
"../host/btBoundSearchCL.h",
|
||||
"../host/btPrefixScanCL.cpp",
|
||||
"../host/btPrefixScanCL.h",
|
||||
"../host/btRadixSort32CL.cpp",
|
||||
"../host/btRadixSort32CL.h",
|
||||
"../host/b3FillCL.cpp",
|
||||
"../host/b3FillCL.h",
|
||||
"../host/b3BoundSearchCL.cpp",
|
||||
"../host/b3BoundSearchCL.h",
|
||||
"../host/b3PrefixScanCL.cpp",
|
||||
"../host/b3PrefixScanCL.h",
|
||||
"../host/b3RadixSort32CL.cpp",
|
||||
"../host/b3RadixSort32CL.h",
|
||||
"../../../src/Bullet3Common/b3AlignedAllocator.cpp",
|
||||
"../../../src/Bullet3Common/b3AlignedAllocator.h",
|
||||
"../../../src/Bullet3Common/b3AlignedObjectArray.h",
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
///original author: Erwin Coumans
|
||||
#include "b3OpenCLUtils.h"
|
||||
#include "../parallel_primitives/host/btOpenCLArray.h"
|
||||
#include "../parallel_primitives/host/btLauncherCL.h"
|
||||
#include "../parallel_primitives/host/b3OpenCLArray.h"
|
||||
#include "../parallel_primitives/host/b3LauncherCL.h"
|
||||
#include <stdio.h>
|
||||
|
||||
|
||||
@@ -58,8 +58,8 @@ int main(int argc, char* argv[])
|
||||
addKernel = b3OpenCLUtils::compileCLKernelFromString(ctx,device,kernelString,"ReduceGlobal",&ciErrNum);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
int numElements = 1024*1024;
|
||||
btOpenCLArray<int> a(ctx,queue);
|
||||
btOpenCLArray<int> b(ctx,queue);
|
||||
b3OpenCLArray<int> a(ctx,queue);
|
||||
b3OpenCLArray<int> b(ctx,queue);
|
||||
b3AlignedObjectArray<int> hostA;
|
||||
b3AlignedObjectArray<int> hostB;
|
||||
|
||||
@@ -79,7 +79,7 @@ int main(int argc, char* argv[])
|
||||
b.resize(numElements);
|
||||
|
||||
{
|
||||
btLauncherCL launcher( queue, addKernel);
|
||||
b3LauncherCL launcher( queue, addKernel);
|
||||
launcher.setBuffer( a.getBufferCL());
|
||||
launcher.setBuffer( b.getBufferCL());
|
||||
launcher.setConst( numElements );
|
||||
@@ -87,7 +87,7 @@ int main(int argc, char* argv[])
|
||||
}
|
||||
clFinish(queue);
|
||||
{
|
||||
btLauncherCL launcher( queue, addKernel);
|
||||
b3LauncherCL launcher( queue, addKernel);
|
||||
launcher.setBuffer( b.getBufferCL());
|
||||
launcher.setBuffer( a.getBufferCL());
|
||||
launcher.setConst( 1024 );
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#define GRID3DOCL_CHECKERROR(a, b) if((a)!=(b)) { printf("3D GRID OCL Error : %d\n", (a)); btAssert((a) == (b)); }
|
||||
#define GRID3DOCL_CHECKERROR(a, b) if((a)!=(b)) { printf("3D GRID OCL Error : %d\n", (a)); b3Assert((a) == (b)); }
|
||||
size_t wgSize;
|
||||
|
||||
#include "VectorAddKernels.h"
|
||||
@@ -334,7 +334,7 @@ int main(int argc, char **argv)
|
||||
else
|
||||
{
|
||||
size_t localWorkSize[2], globalWorkSize[2];
|
||||
//workgroupSize = btMin(workgroupSize, actualGlobalSize);
|
||||
//workgroupSize = b3Min(workgroupSize, actualGlobalSize);
|
||||
int num_t = actualGlobalSize / workgroupSize;
|
||||
int num_g = num_t * workgroupSize;
|
||||
if(num_g < actualGlobalSize)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
///original author: Erwin Coumans
|
||||
#include "b3OpenCLUtils.h"
|
||||
#include "../parallel_primitives/host/btOpenCLArray.h"
|
||||
#include "../parallel_primitives/host/btLauncherCL.h"
|
||||
#include "../parallel_primitives/host/b3OpenCLArray.h"
|
||||
#include "../parallel_primitives/host/b3LauncherCL.h"
|
||||
#include <stdio.h>
|
||||
|
||||
|
||||
@@ -42,9 +42,9 @@ int main(int argc, char* argv[])
|
||||
addKernel = b3OpenCLUtils::compileCLKernelFromString(ctx,device,kernelString,"VectorAdd",&ciErrNum);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
int numElements = 32;
|
||||
btOpenCLArray<float> a(ctx,queue);
|
||||
btOpenCLArray<float> b(ctx,queue);
|
||||
btOpenCLArray<float> c(ctx,queue);
|
||||
b3OpenCLArray<float> a(ctx,queue);
|
||||
b3OpenCLArray<float> b(ctx,queue);
|
||||
b3OpenCLArray<float> c(ctx,queue);
|
||||
for (int i=0;i<numElements;i++)
|
||||
{
|
||||
a.push_back(float(i));
|
||||
@@ -52,7 +52,7 @@ int main(int argc, char* argv[])
|
||||
}
|
||||
|
||||
c.resize(numElements);
|
||||
btLauncherCL launcher( queue, addKernel);
|
||||
b3LauncherCL launcher( queue, addKernel);
|
||||
launcher.setBuffer( a.getBufferCL());
|
||||
launcher.setBuffer( b.getBufferCL());
|
||||
launcher.setBuffer( c.getBufferCL());
|
||||
|
||||
Reference in New Issue
Block a user