Added IBM Cell SDK 2.x software_cache to Bullet/Extras. There is an option to enable it for the BulletMultiThreaded Cell version.
See USE_SOFTWARE_CACHE in Bullet\src\BulletMultiThreaded\SpuNarrowPhaseCollisionTask\SpuGatheringCollisionTask.cpp It improves the Bullet midphase collision detection (triangle/vertex fetch) The license is CommonPublicLicense-1.0, see included license docs.
This commit is contained in:
153
Extras/software_cache/cache/include/nway-opt.h
vendored
Normal file
153
Extras/software_cache/cache/include/nway-opt.h
vendored
Normal file
@@ -0,0 +1,153 @@
|
||||
/* --------------------------------------------------------------- */
|
||||
/* PLEASE DO NOT MODIFY THIS SECTION */
|
||||
/* This prolog section is automatically generated. */
|
||||
/* */
|
||||
/* (C) Copyright 2001,2006, */
|
||||
/* International Business Machines Corporation, */
|
||||
/* */
|
||||
/* All Rights Reserved. */
|
||||
/* --------------------------------------------------------------- */
|
||||
/* PROLOG END TAG zYx */
|
||||
/* nway-opt.h
|
||||
*
|
||||
* Copyright (C) 2006 IBM Corp.
|
||||
*
|
||||
* "Optimized" lookup operations for n-way set associative
|
||||
* software managed cache.
|
||||
*/
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
#ifndef __SPE_CACHE_NWAY_OPT_H_
|
||||
#define __SPE_CACHE_NWAY_OPT_H_
|
||||
|
||||
/* __spe_cache_rd
|
||||
* Look up and return data from the cache. If the data
|
||||
* is not currently in cache then transfer it from main
|
||||
* storage.
|
||||
*
|
||||
* This code uses a conditional branch to the cache miss
|
||||
* handler in the event that the requested data is not
|
||||
* in the cache. A branch hint is used to avoid paying
|
||||
* the branch stall penalty.
|
||||
*/
|
||||
#define __spe_cache_rd(type, ea) \
|
||||
({ \
|
||||
int set, idx, lnum, byte; \
|
||||
type ret; \
|
||||
_spe_cache_nway_lookup_(ea, set, idx); \
|
||||
if (unlikely(idx < 0)) { \
|
||||
idx = _spe_cache_miss_(ea, set, -1); \
|
||||
spu_writech(22, SPE_CACHE_SET_TAGMASK(set)); \
|
||||
spu_mfcstat(MFC_TAG_UPDATE_ALL); \
|
||||
} \
|
||||
lnum = _spe_cacheline_num_(set, idx); \
|
||||
byte = _spe_cacheline_byte_offset_(ea); \
|
||||
ret = *((type *) (&spe_cache_mem[lnum + byte])); \
|
||||
ret; \
|
||||
})
|
||||
|
||||
/**
|
||||
* __spe_cache_rd_x4
|
||||
* Fetch four data elements from the cache.
|
||||
*
|
||||
* This code uses one conditional branch in
|
||||
* the event that any of the four elements
|
||||
* are missing.
|
||||
*
|
||||
* On a miss, light weight locking is used to
|
||||
* avoid casting out entries that were found.
|
||||
* Further, we wait just once for the transfers,
|
||||
* allowing for parallel [rather than serial]
|
||||
* transfers.
|
||||
*/
|
||||
|
||||
#define __spe_cache_rd_x4(type, ea_x4) \
|
||||
({ \
|
||||
vector unsigned int missing; \
|
||||
unsigned int ms; \
|
||||
vector unsigned int cindex; \
|
||||
unsigned int d0, d1, d2, d3; \
|
||||
vector unsigned int s_x4; \
|
||||
vector signed int i_x4; \
|
||||
vector unsigned int ibyte, iline; \
|
||||
vector unsigned int ret; \
|
||||
unsigned int idx0, idx1, idx2, idx3; \
|
||||
\
|
||||
_spe_cache_nway_lookup_x4(ea_x4, s_x4, i_x4); \
|
||||
missing = spu_rlmask ((vector unsigned int)i_x4, -8); \
|
||||
ms = spu_extract (spu_gather (missing), 0); \
|
||||
\
|
||||
ibyte = _spe_cacheline_byte_offset_x4(ea_x4); \
|
||||
\
|
||||
iline = _spe_cacheline_num_x4(s_x4, \
|
||||
(vector unsigned int)i_x4); \
|
||||
\
|
||||
cindex = spu_add (iline, ibyte); \
|
||||
\
|
||||
idx0 = spu_extract (cindex, 0); \
|
||||
idx1 = spu_extract (cindex, 1); \
|
||||
idx2 = spu_extract (cindex, 2); \
|
||||
idx3 = spu_extract (cindex, 3); \
|
||||
\
|
||||
d0 = *((type *) (&spe_cache_mem[idx0])); \
|
||||
d1 = *((type *) (&spe_cache_mem[idx1])); \
|
||||
d2 = *((type *) (&spe_cache_mem[idx2])); \
|
||||
d3 = *((type *) (&spe_cache_mem[idx3])); \
|
||||
\
|
||||
ret = _load_vec_uint4 (d0, d1, d2, d3); \
|
||||
\
|
||||
if (unlikely(ms)) { \
|
||||
int b0 = spu_extract (ibyte, 0); \
|
||||
int b1 = spu_extract (ibyte, 1); \
|
||||
int b2 = spu_extract (ibyte, 2); \
|
||||
int b3 = spu_extract (ibyte, 3); \
|
||||
int lnum0; \
|
||||
int lnum1; \
|
||||
int lnum2; \
|
||||
int lnum3; \
|
||||
int s0 = spu_extract (s_x4, 0); \
|
||||
int s1 = spu_extract (s_x4, 1); \
|
||||
int s2 = spu_extract (s_x4, 2); \
|
||||
int s3 = spu_extract (s_x4, 3); \
|
||||
int i0 = spu_extract (i_x4, 0); \
|
||||
int i1 = spu_extract (i_x4, 1); \
|
||||
int i2 = spu_extract (i_x4, 2); \
|
||||
int i3 = spu_extract (i_x4, 3); \
|
||||
unsigned int ea0 = spu_extract(ea_x4, 0); \
|
||||
unsigned int ea1 = spu_extract(ea_x4, 1); \
|
||||
unsigned int ea2 = spu_extract(ea_x4, 2); \
|
||||
unsigned int ea3 = spu_extract(ea_x4, 3); \
|
||||
int avail = -1; \
|
||||
\
|
||||
avail &= ~(((i0 < 0) ? 0 : (1 << i0)) | \
|
||||
((i1 < 0) ? 0 : (1 << i1)) | \
|
||||
((i2 < 0) ? 0 : (1 << i2)) | \
|
||||
((i3 < 0) ? 0 : (1 << i3))); \
|
||||
\
|
||||
i0 = _spe_cache_miss_(ea0, s0, avail); \
|
||||
avail &= ~(1 << i0); \
|
||||
i1 = _spe_cache_miss_(ea1, s1, avail); \
|
||||
avail &= ~(1 << i1); \
|
||||
i2 = _spe_cache_miss_(ea2, s2, avail); \
|
||||
avail &= ~(1 << i2); \
|
||||
i3 = _spe_cache_miss_(ea3, s3, avail); \
|
||||
\
|
||||
lnum0 = _spe_cacheline_num_(s0, i0); \
|
||||
lnum1 = _spe_cacheline_num_(s1, i1); \
|
||||
lnum2 = _spe_cacheline_num_(s2, i2); \
|
||||
lnum3 = _spe_cacheline_num_(s3, i3); \
|
||||
\
|
||||
spu_writech(22, SPE_CACHE_SET_TAGMASK(set)); \
|
||||
spu_mfcstat(MFC_TAG_UPDATE_ALL); \
|
||||
\
|
||||
d0 = *((type *) (&spe_cache_mem[lnum0 + b0])); \
|
||||
d1 = *((type *) (&spe_cache_mem[lnum1 + b1])); \
|
||||
d2 = *((type *) (&spe_cache_mem[lnum2 + b2])); \
|
||||
d3 = *((type *) (&spe_cache_mem[lnum3 + b3])); \
|
||||
\
|
||||
ret = _load_vec_uint4 (d0, d1, d2, d3); \
|
||||
} \
|
||||
ret; \
|
||||
})
|
||||
|
||||
#endif /* _SPE_CACHE_NWAY_OPT_H_ */
|
||||
Reference in New Issue
Block a user