fix out-of-bounds in AMD version of VectorAdd
This commit is contained in:
@@ -300,9 +300,10 @@ int main(int argc, char **argv)
|
||||
{
|
||||
num_t++;
|
||||
//this can cause problems -> processing outside of the buffer
|
||||
//make sure to check kernel
|
||||
}
|
||||
|
||||
size_t globalThreads[] = {actualGlobalSize};//num_t * workgroupSize};
|
||||
size_t globalThreads[] = {num_t * workgroupSize};
|
||||
size_t localThreads[] = {workgroupSize};
|
||||
|
||||
|
||||
|
||||
@@ -24,6 +24,8 @@ __kernel void VectorAdd(__global const float8* a, __global const float8* b, __gl
|
||||
{
|
||||
// get oct-float index into global data array
|
||||
int iGID = get_global_id(0);
|
||||
if (iGID>=100000)
|
||||
return;
|
||||
|
||||
// read inputs into registers
|
||||
float8 f8InA = a[iGID];
|
||||
|
||||
@@ -17,6 +17,8 @@ subject to the following restrictions:
|
||||
#ifndef GEN_MINMAX_H
|
||||
#define GEN_MINMAX_H
|
||||
|
||||
#include "btScalar.h"
|
||||
|
||||
template <class T>
|
||||
SIMD_FORCE_INLINE const T& btMin(const T& a, const T& b)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user