fix: some file didn't have the svn:eol-style native yet

This commit is contained in:
erwin.coumans
2010-03-06 15:23:36 +00:00
parent 4fd48ac691
commit 81f04a4d48
641 changed files with 301123 additions and 301123 deletions

View File

@@ -1,28 +1,28 @@
# ---------------------------------------------------------------
# PLEASE DO NOT MODIFY THIS SECTION
# This prolog section is automatically generated.
#
# (C) Copyright 2001,2006,
# International Business Machines Corporation,
#
# All Rights Reserved.
# ---------------------------------------------------------------
# PROLOG END TAG zYx
########################################################################
# Common Makefile
########################################################################
INSTALL_DIR = $(SDKINC_spu)/cache
INSTALL_FILES := api.h \
defs.h \
dma.h \
nway.h \
nway-lookup.h \
nway-miss.h \
nway-opt.h \
nway-replace.h \
spe_cache.h
include ../../../../../make.footer
# ---------------------------------------------------------------
# PLEASE DO NOT MODIFY THIS SECTION
# This prolog section is automatically generated.
#
# (C) Copyright 2001,2006,
# International Business Machines Corporation,
#
# All Rights Reserved.
# ---------------------------------------------------------------
# PROLOG END TAG zYx
########################################################################
# Common Makefile
########################################################################
INSTALL_DIR = $(SDKINC_spu)/cache
INSTALL_FILES := api.h \
defs.h \
dma.h \
nway.h \
nway-lookup.h \
nway-miss.h \
nway-opt.h \
nway-replace.h \
spe_cache.h
include ../../../../../make.footer

View File

@@ -1,32 +1,32 @@
%% ---------------------------------------------------------------
%% PLEASE DO NOT MODIFY THIS SECTION
%% This prolog section is automatically generated.
%%
%% (C) Copyright 2001,2006,
%% International Business Machines Corporation,
%%
%% All Rights Reserved.
%% ---------------------------------------------------------------
%% PROLOG END TAG zYx
This directory contains implementation of software managed cache for
SPE. Whenever possible, the cache interfaces are implemented as macros
or inline-able functions.
Depending on compile-time settings, different cache implementations can
be selected.
The include file heirarchy is:
+ spe_cache.h Top level header.
|
+ defs.h Common definitions.
+ dma.h Initiate DMA transfers.
+ nway.h Top level n-way header.
|
+ nway-lookup.h n-way lookup operations.
+ nway-miss.h n-way cache miss handler.
+ nway-replace.h n-way cache replace handler.
+ nway-opt.h "optimized" n-way interfaces.
|
+ api.h Basic application interfaces.
%% ---------------------------------------------------------------
%% PLEASE DO NOT MODIFY THIS SECTION
%% This prolog section is automatically generated.
%%
%% (C) Copyright 2001,2006,
%% International Business Machines Corporation,
%%
%% All Rights Reserved.
%% ---------------------------------------------------------------
%% PROLOG END TAG zYx
This directory contains implementation of software managed cache for
SPE. Whenever possible, the cache interfaces are implemented as macros
or inline-able functions.
Depending on compile-time settings, different cache implementations can
be selected.
The include file heirarchy is:
+ spe_cache.h Top level header.
|
+ defs.h Common definitions.
+ dma.h Initiate DMA transfers.
+ nway.h Top level n-way header.
|
+ nway-lookup.h n-way lookup operations.
+ nway-miss.h n-way cache miss handler.
+ nway-replace.h n-way cache replace handler.
+ nway-opt.h "optimized" n-way interfaces.
|
+ api.h Basic application interfaces.

View File

@@ -1,31 +1,31 @@
/* --------------------------------------------------------------- */
/* PLEASE DO NOT MODIFY THIS SECTION */
/* This prolog section is automatically generated. */
/* */
/* (C) Copyright 2001,2006, */
/* International Business Machines Corporation, */
/* */
/* All Rights Reserved. */
/* --------------------------------------------------------------- */
/* PROLOG END TAG zYx */
/* api.h
*
* Copyright (C) 2005 IBM Corp.
*
* Simple API for software managed cache on SPEs.
* A sophisticated application would not use these,
* but rather use the low-level lookup functions.
*/
#ifndef __SPE_CACHE_API_H__
#define __SPE_CACHE_API_H__
typedef void *spe_cache_entry_t;
#define spe_cache_rd(ea) _spe_cache_lookup_xfer_wait_(ea, 0, 1)
#define spe_cache_tr(ea) _spe_cache_lookup_xfer_(ea, 0, 1)
#define spe_cache_lr(ea) _spe_cache_lookup_(ea, 0)
#define spe_cache_wait(entry) _spe_cache_wait_(entry)
#endif
/* --------------------------------------------------------------- */
/* PLEASE DO NOT MODIFY THIS SECTION */
/* This prolog section is automatically generated. */
/* */
/* (C) Copyright 2001,2006, */
/* International Business Machines Corporation, */
/* */
/* All Rights Reserved. */
/* --------------------------------------------------------------- */
/* PROLOG END TAG zYx */
/* api.h
*
* Copyright (C) 2005 IBM Corp.
*
* Simple API for software managed cache on SPEs.
* A sophisticated application would not use these,
* but rather use the low-level lookup functions.
*/
#ifndef __SPE_CACHE_API_H__
#define __SPE_CACHE_API_H__
typedef void *spe_cache_entry_t;
#define spe_cache_rd(ea) _spe_cache_lookup_xfer_wait_(ea, 0, 1)
#define spe_cache_tr(ea) _spe_cache_lookup_xfer_(ea, 0, 1)
#define spe_cache_lr(ea) _spe_cache_lookup_(ea, 0)
#define spe_cache_wait(entry) _spe_cache_wait_(entry)
#endif

View File

@@ -1,245 +1,245 @@
/* @(#)17 1.4 src/include/cbe_mfc.h, sw.includes, sdk_pub 10/11/05 16:00:25 */
/* -------------------------------------------------------------- */
/* (C) Copyright 2001,2005, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment Incorporated, */
/* Toshiba Corporation. */
/* */
/* All Rights Reserved. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifndef _CBEA_MFC_H_
#define _CBEA_MFC_H_
/* This header file contains various definitions related to the Memory Flow
* Controller (MFC) portion of the Cell Broadband Engine Architecture (CBEA).
*/
/**************************************/
/* MFC DMA Command Opcode Definitions */
/**************************************/
/****************************************************************************/
/* MFC DMA Command flags which identify classes of operations. */
/****************************************************************************/
/* Note: These flags may be used in conjunction with the base command types
* (i.e. MFC_PUT_CMD, MFC_PUTR_CMD, MFC_GET_CMD, and MFC_SNDSIG_CMD)
* to construct the various command permutations.
*/
#define MFC_BARRIER_ENABLE 0x01
#define MFC_FENCE_ENABLE 0x02
#define MFC_LIST_ENABLE 0x04 /* SPU Only */
#define MFC_START_ENABLE 0x08 /* proxy Only */
#define MFC_RESULT_ENABLE 0x10
/****************************************************************************/
/* MFC DMA Put Commands */
/****************************************************************************/
#define MFC_PUT_CMD 0x20
#define MFC_PUTS_CMD 0x28 /* proxy Only */
#define MFC_PUTR_CMD 0x30
#define MFC_PUTF_CMD 0x22
#define MFC_PUTB_CMD 0x21
#define MFC_PUTFS_CMD 0x2A /* proxy Only */
#define MFC_PUTBS_CMD 0x29 /* proxy Only */
#define MFC_PUTRF_CMD 0x32
#define MFC_PUTRB_CMD 0x31
#define MFC_PUTL_CMD 0x24 /* SPU Only */
#define MFC_PUTRL_CMD 0x34 /* SPU Only */
#define MFC_PUTLF_CMD 0x26 /* SPU Only */
#define MFC_PUTLB_CMD 0x25 /* SPU Only */
#define MFC_PUTRLF_CMD 0x36 /* SPU Only */
#define MFC_PUTRLB_CMD 0x35 /* SPU Only */
/****************************************************************************/
/* MFC DMA Get Commands */
/****************************************************************************/
#define MFC_GET_CMD 0x40
#define MFC_GETS_CMD 0x48 /* proxy Only */
#define MFC_GETF_CMD 0x42
#define MFC_GETB_CMD 0x41
#define MFC_GETFS_CMD 0x4A /* proxy Only */
#define MFC_GETBS_CMD 0x49 /* proxy Only */
#define MFC_GETL_CMD 0x44 /* SPU Only */
#define MFC_GETLF_CMD 0x46 /* SPU Only */
#define MFC_GETLB_CMD 0x45 /* SPU Only */
/****************************************************************************/
/* MFC DMA Storage Control Commands */
/****************************************************************************/
/* Note: These are only supported on implementations with a SL1 cache
* They are no-ops on the initial (CBE) implementation.
*/
#define MFC_SDCRT_CMD 0x80
#define MFC_SDCRTST_CMD 0x81
#define MFC_SDCRZ_CMD 0x89
#define MFC_SDCRS_CMD 0x8D
#define MFC_SDCRF_CMD 0x8F
/****************************************************************************/
/* MFC Synchronization Commands */
/****************************************************************************/
#define MFC_GETLLAR_CMD 0xD0 /* SPU Only */
#define MFC_PUTLLC_CMD 0xB4 /* SPU Only */
#define MFC_PUTLLUC_CMD 0xB0 /* SPU Only */
#define MFC_PUTQLLUC_CMD 0xB8 /* SPU Only */
#define MFC_SNDSIG_CMD 0xA0
#define MFC_SNDSIGB_CMD 0xA1
#define MFC_SNDSIGF_CMD 0xA2
#define MFC_BARRIER_CMD 0xC0
#define MFC_EIEIO_CMD 0xC8
#define MFC_SYNC_CMD 0xCC
/****************************************************************************/
/* Definitions for constructing a 32-bit command word including the transfer
* and replacement class id and the command opcode.
*/
/****************************************************************************/
#define MFC_TCLASS(_tid) ((_tid) << 24)
#define MFC_RCLASS(_rid) ((_rid) << 16)
#define MFC_CMD_WORD(_tid, _rid, _cmd) (MFC_TCLASS(_tid) | MFC_RCLASS(_rid) | (_cmd))
/****************************************************************************/
/* Definitions for constructing a 64-bit command word including the size, tag,
* transfer and replacement class id and the command opcode.
*/
/****************************************************************************/
#define MFC_SIZE(_size) ((unsigned long long)(_size) << 48)
#define MFC_TAG(_tag_id) ((unsigned long long)(_tag_id) << 32)
#define MFC_TR_CMD(_trcmd) ((unsigned long long)(_trcmd))
#define MFC_CMD_DWORD(_size, _tag_id, _trcmd) (MFC_SIZE(_size) | MFC_TAG(_tag_id) | MFC_TR_CMD(_trcmd))
/****************************************************************************/
/* Mask definitions for obtaining DMA commands and class ids from packed words.
*/
/****************************************************************************/
#define MFC_CMD_MASK 0x0000FFFF
#define MFC_CLASS_MASK 0x000000FF
/****************************************************************************/
/* DMA max/min size definitions. */
/****************************************************************************/
#define MFC_MIN_DMA_SIZE_SHIFT 4 /* 16 bytes */
#define MFC_MAX_DMA_SIZE_SHIFT 14 /* 16384 bytes */
#define MFC_MIN_DMA_SIZE (1 << MFC_MIN_DMA_SIZE_SHIFT)
#define MFC_MAX_DMA_SIZE (1 << MFC_MAX_DMA_SIZE_SHIFT)
#define MFC_MIN_DMA_SIZE_MASK (MFC_MIN_DMA_SIZE - 1)
#define MFC_MAX_DMA_SIZE_MASK (MFC_MAX_DMA_SIZE - 1)
#define MFC_MIN_DMA_LIST_SIZE 0x0008 /* 8 bytes */
#define MFC_MAX_DMA_LIST_SIZE 0x4000 /* 16K bytes */
/****************************************************************************/
/* Mask definition for checking proper address alignment. */
/****************************************************************************/
#define MFC_ADDR_MATCH_MASK 0xF
#define MFC_BEST_ADDR_ALIGNMENT 0x80
/****************************************************************************/
/* Definitions related to the Proxy DMA Command Status register (DMA_CMDStatus).
*/
/****************************************************************************/
#define MFC_PROXY_DMA_CMD_ENQUEUE_SUCCESSFUL 0x00
#define MFC_PROXY_DMA_CMD_SEQUENCE_ERROR 0x01
#define MFC_PROXY_DMA_QUEUE_FULL 0x02
/****************************************************************************/
/* Definitions related to the DMA Queue Status register (DMA_QStatus). */
/****************************************************************************/
#define MFC_PROXY_MAX_QUEUE_SPACE 0x08
#define MFC_PROXY_DMA_Q_EMPTY 0x80000000
#define MFC_PROXY_DMA_Q_FREE_SPACE_MASK 0x0000FFFF
#define MFC_SPU_MAX_QUEUE_SPACE 0x10
/****************************************************************************/
/* Definitions related to the Proxy Tag-Group Query-Type register
* (Prxy_QueryType).
*/
/****************************************************************************/
#define MFC_PROXY_DMA_QUERYTYPE_ANY 0x1
#define MFC_PROXY_DMA_QUERYTYPE_ALL 0x2
/****************************************************************************/
/* Definitions related to the Proxy Tag-Group Query-Mask (Prxy_QueryMask)
* and PU Tag Status (DMA_TagStatus) registers.
*
* NOTE: The only use the bottom 5 bits of the tag id value passed to insure
* a valid tag id is used.
*/
/****************************************************************************/
#define MFC_TAGID_TO_TAGMASK(tag_id) (1 << (tag_id & 0x1F))
/****************************************************************************/
/* Definitions related to the Mailbox Status register (SPU_Mbox_Stat) and the
* depths of the outbound Mailbox Register (SPU_OutMbox), the outbound
* interrupting Mailbox Register (SPU_OutIntrMbox), and the inbound Mailbox
* Register (SPU_In_Mbox).
*/
/****************************************************************************/
#define MFC_SPU_OUT_MBOX_COUNT_STATUS_MASK 0x000000FF
#define MFC_SPU_OUT_MBOX_COUNT_STATUS_SHIFT 0x0
#define MFC_SPU_IN_MBOX_COUNT_STATUS_MASK 0x0000FF00
#define MFC_SPU_IN_MBOX_COUNT_STATUS_SHIFT 0x8
#define MFC_SPU_OUT_INTR_MBOX_COUNT_STATUS_MASK 0x00FF0000
#define MFC_SPU_OUT_INTR_MBOX_COUNT_STATUS_SHIFT 0x10
/****************************************************************************/
/* Definitions related to the SPC Multi Source Syncronization register
* (MFC_MSSync).
*/
/****************************************************************************/
#define MFC_SPC_MSS_STATUS_MASK 0x1
#define MFC_SPC_MSS_COMPLETE 0x0
#define MFC_SPC_MSS_NOT_COMPLETE 0x1
/*******************************************
* Channel Defines
*******************************************/
/* Events Defines for channels:
* 0 (SPU_RdEventStat),
* 1 (SPU_WrEventMask), and
* 2 (SPU_WrEventAck).
*/
#define MFC_TAG_STATUS_UPDATE_EVENT 0x00000001
#define MFC_LIST_STALL_NOTIFY_EVENT 0x00000002
#define MFC_COMMAND_QUEUE_AVAILABLE_EVENT 0x00000008
#define MFC_IN_MBOX_AVAILABLE_EVENT 0x00000010
#define MFC_DECREMENTER_EVENT 0x00000020
#define MFC_OUT_INTR_MBOX_AVAILABLE_EVENT 0x00000040
#define MFC_OUT_MBOX_AVAILABLE_EVENT 0x00000080
#define MFC_SIGNAL_NOTIFY_2_EVENT 0x00000100
#define MFC_SIGNAL_NOTIFY_1_EVENT 0x00000200
#define MFC_LLR_LOST_EVENT 0x00000400
#define MFC_PRIV_ATTN_EVENT 0x00000800
#define MFC_MULTI_SRC_SYNC_EVENT 0x00001000
/* Tag Status Update defines for channel 23 (MFC_WrTagUpdate)
*/
#define MFC_TAG_UPDATE_IMMEDIATE 0x0
#define MFC_TAG_UPDATE_ANY 0x1
#define MFC_TAG_UPDATE_ALL 0x2
/* Atomic Command Status defines for channel 27 (MFC_RdAtomicStat)
*/
#define MFC_PUTLLC_STATUS 0x00000001
#define MFC_PUTLLUC_STATUS 0x00000002
#define MFC_GETLLAR_STATUS 0x00000004
#endif /* _CBEA_MFC_H_ */
/* @(#)17 1.4 src/include/cbe_mfc.h, sw.includes, sdk_pub 10/11/05 16:00:25 */
/* -------------------------------------------------------------- */
/* (C) Copyright 2001,2005, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment Incorporated, */
/* Toshiba Corporation. */
/* */
/* All Rights Reserved. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifndef _CBEA_MFC_H_
#define _CBEA_MFC_H_
/* This header file contains various definitions related to the Memory Flow
* Controller (MFC) portion of the Cell Broadband Engine Architecture (CBEA).
*/
/**************************************/
/* MFC DMA Command Opcode Definitions */
/**************************************/
/****************************************************************************/
/* MFC DMA Command flags which identify classes of operations. */
/****************************************************************************/
/* Note: These flags may be used in conjunction with the base command types
* (i.e. MFC_PUT_CMD, MFC_PUTR_CMD, MFC_GET_CMD, and MFC_SNDSIG_CMD)
* to construct the various command permutations.
*/
#define MFC_BARRIER_ENABLE 0x01
#define MFC_FENCE_ENABLE 0x02
#define MFC_LIST_ENABLE 0x04 /* SPU Only */
#define MFC_START_ENABLE 0x08 /* proxy Only */
#define MFC_RESULT_ENABLE 0x10
/****************************************************************************/
/* MFC DMA Put Commands */
/****************************************************************************/
#define MFC_PUT_CMD 0x20
#define MFC_PUTS_CMD 0x28 /* proxy Only */
#define MFC_PUTR_CMD 0x30
#define MFC_PUTF_CMD 0x22
#define MFC_PUTB_CMD 0x21
#define MFC_PUTFS_CMD 0x2A /* proxy Only */
#define MFC_PUTBS_CMD 0x29 /* proxy Only */
#define MFC_PUTRF_CMD 0x32
#define MFC_PUTRB_CMD 0x31
#define MFC_PUTL_CMD 0x24 /* SPU Only */
#define MFC_PUTRL_CMD 0x34 /* SPU Only */
#define MFC_PUTLF_CMD 0x26 /* SPU Only */
#define MFC_PUTLB_CMD 0x25 /* SPU Only */
#define MFC_PUTRLF_CMD 0x36 /* SPU Only */
#define MFC_PUTRLB_CMD 0x35 /* SPU Only */
/****************************************************************************/
/* MFC DMA Get Commands */
/****************************************************************************/
#define MFC_GET_CMD 0x40
#define MFC_GETS_CMD 0x48 /* proxy Only */
#define MFC_GETF_CMD 0x42
#define MFC_GETB_CMD 0x41
#define MFC_GETFS_CMD 0x4A /* proxy Only */
#define MFC_GETBS_CMD 0x49 /* proxy Only */
#define MFC_GETL_CMD 0x44 /* SPU Only */
#define MFC_GETLF_CMD 0x46 /* SPU Only */
#define MFC_GETLB_CMD 0x45 /* SPU Only */
/****************************************************************************/
/* MFC DMA Storage Control Commands */
/****************************************************************************/
/* Note: These are only supported on implementations with a SL1 cache
* They are no-ops on the initial (CBE) implementation.
*/
#define MFC_SDCRT_CMD 0x80
#define MFC_SDCRTST_CMD 0x81
#define MFC_SDCRZ_CMD 0x89
#define MFC_SDCRS_CMD 0x8D
#define MFC_SDCRF_CMD 0x8F
/****************************************************************************/
/* MFC Synchronization Commands */
/****************************************************************************/
#define MFC_GETLLAR_CMD 0xD0 /* SPU Only */
#define MFC_PUTLLC_CMD 0xB4 /* SPU Only */
#define MFC_PUTLLUC_CMD 0xB0 /* SPU Only */
#define MFC_PUTQLLUC_CMD 0xB8 /* SPU Only */
#define MFC_SNDSIG_CMD 0xA0
#define MFC_SNDSIGB_CMD 0xA1
#define MFC_SNDSIGF_CMD 0xA2
#define MFC_BARRIER_CMD 0xC0
#define MFC_EIEIO_CMD 0xC8
#define MFC_SYNC_CMD 0xCC
/****************************************************************************/
/* Definitions for constructing a 32-bit command word including the transfer
* and replacement class id and the command opcode.
*/
/****************************************************************************/
#define MFC_TCLASS(_tid) ((_tid) << 24)
#define MFC_RCLASS(_rid) ((_rid) << 16)
#define MFC_CMD_WORD(_tid, _rid, _cmd) (MFC_TCLASS(_tid) | MFC_RCLASS(_rid) | (_cmd))
/****************************************************************************/
/* Definitions for constructing a 64-bit command word including the size, tag,
* transfer and replacement class id and the command opcode.
*/
/****************************************************************************/
#define MFC_SIZE(_size) ((unsigned long long)(_size) << 48)
#define MFC_TAG(_tag_id) ((unsigned long long)(_tag_id) << 32)
#define MFC_TR_CMD(_trcmd) ((unsigned long long)(_trcmd))
#define MFC_CMD_DWORD(_size, _tag_id, _trcmd) (MFC_SIZE(_size) | MFC_TAG(_tag_id) | MFC_TR_CMD(_trcmd))
/****************************************************************************/
/* Mask definitions for obtaining DMA commands and class ids from packed words.
*/
/****************************************************************************/
#define MFC_CMD_MASK 0x0000FFFF
#define MFC_CLASS_MASK 0x000000FF
/****************************************************************************/
/* DMA max/min size definitions. */
/****************************************************************************/
#define MFC_MIN_DMA_SIZE_SHIFT 4 /* 16 bytes */
#define MFC_MAX_DMA_SIZE_SHIFT 14 /* 16384 bytes */
#define MFC_MIN_DMA_SIZE (1 << MFC_MIN_DMA_SIZE_SHIFT)
#define MFC_MAX_DMA_SIZE (1 << MFC_MAX_DMA_SIZE_SHIFT)
#define MFC_MIN_DMA_SIZE_MASK (MFC_MIN_DMA_SIZE - 1)
#define MFC_MAX_DMA_SIZE_MASK (MFC_MAX_DMA_SIZE - 1)
#define MFC_MIN_DMA_LIST_SIZE 0x0008 /* 8 bytes */
#define MFC_MAX_DMA_LIST_SIZE 0x4000 /* 16K bytes */
/****************************************************************************/
/* Mask definition for checking proper address alignment. */
/****************************************************************************/
#define MFC_ADDR_MATCH_MASK 0xF
#define MFC_BEST_ADDR_ALIGNMENT 0x80
/****************************************************************************/
/* Definitions related to the Proxy DMA Command Status register (DMA_CMDStatus).
*/
/****************************************************************************/
#define MFC_PROXY_DMA_CMD_ENQUEUE_SUCCESSFUL 0x00
#define MFC_PROXY_DMA_CMD_SEQUENCE_ERROR 0x01
#define MFC_PROXY_DMA_QUEUE_FULL 0x02
/****************************************************************************/
/* Definitions related to the DMA Queue Status register (DMA_QStatus). */
/****************************************************************************/
#define MFC_PROXY_MAX_QUEUE_SPACE 0x08
#define MFC_PROXY_DMA_Q_EMPTY 0x80000000
#define MFC_PROXY_DMA_Q_FREE_SPACE_MASK 0x0000FFFF
#define MFC_SPU_MAX_QUEUE_SPACE 0x10
/****************************************************************************/
/* Definitions related to the Proxy Tag-Group Query-Type register
* (Prxy_QueryType).
*/
/****************************************************************************/
#define MFC_PROXY_DMA_QUERYTYPE_ANY 0x1
#define MFC_PROXY_DMA_QUERYTYPE_ALL 0x2
/****************************************************************************/
/* Definitions related to the Proxy Tag-Group Query-Mask (Prxy_QueryMask)
* and PU Tag Status (DMA_TagStatus) registers.
*
* NOTE: The only use the bottom 5 bits of the tag id value passed to insure
* a valid tag id is used.
*/
/****************************************************************************/
#define MFC_TAGID_TO_TAGMASK(tag_id) (1 << (tag_id & 0x1F))
/****************************************************************************/
/* Definitions related to the Mailbox Status register (SPU_Mbox_Stat) and the
* depths of the outbound Mailbox Register (SPU_OutMbox), the outbound
* interrupting Mailbox Register (SPU_OutIntrMbox), and the inbound Mailbox
* Register (SPU_In_Mbox).
*/
/****************************************************************************/
#define MFC_SPU_OUT_MBOX_COUNT_STATUS_MASK 0x000000FF
#define MFC_SPU_OUT_MBOX_COUNT_STATUS_SHIFT 0x0
#define MFC_SPU_IN_MBOX_COUNT_STATUS_MASK 0x0000FF00
#define MFC_SPU_IN_MBOX_COUNT_STATUS_SHIFT 0x8
#define MFC_SPU_OUT_INTR_MBOX_COUNT_STATUS_MASK 0x00FF0000
#define MFC_SPU_OUT_INTR_MBOX_COUNT_STATUS_SHIFT 0x10
/****************************************************************************/
/* Definitions related to the SPC Multi Source Syncronization register
* (MFC_MSSync).
*/
/****************************************************************************/
#define MFC_SPC_MSS_STATUS_MASK 0x1
#define MFC_SPC_MSS_COMPLETE 0x0
#define MFC_SPC_MSS_NOT_COMPLETE 0x1
/*******************************************
* Channel Defines
*******************************************/
/* Events Defines for channels:
* 0 (SPU_RdEventStat),
* 1 (SPU_WrEventMask), and
* 2 (SPU_WrEventAck).
*/
#define MFC_TAG_STATUS_UPDATE_EVENT 0x00000001
#define MFC_LIST_STALL_NOTIFY_EVENT 0x00000002
#define MFC_COMMAND_QUEUE_AVAILABLE_EVENT 0x00000008
#define MFC_IN_MBOX_AVAILABLE_EVENT 0x00000010
#define MFC_DECREMENTER_EVENT 0x00000020
#define MFC_OUT_INTR_MBOX_AVAILABLE_EVENT 0x00000040
#define MFC_OUT_MBOX_AVAILABLE_EVENT 0x00000080
#define MFC_SIGNAL_NOTIFY_2_EVENT 0x00000100
#define MFC_SIGNAL_NOTIFY_1_EVENT 0x00000200
#define MFC_LLR_LOST_EVENT 0x00000400
#define MFC_PRIV_ATTN_EVENT 0x00000800
#define MFC_MULTI_SRC_SYNC_EVENT 0x00001000
/* Tag Status Update defines for channel 23 (MFC_WrTagUpdate)
*/
#define MFC_TAG_UPDATE_IMMEDIATE 0x0
#define MFC_TAG_UPDATE_ANY 0x1
#define MFC_TAG_UPDATE_ALL 0x2
/* Atomic Command Status defines for channel 27 (MFC_RdAtomicStat)
*/
#define MFC_PUTLLC_STATUS 0x00000001
#define MFC_PUTLLUC_STATUS 0x00000002
#define MFC_GETLLAR_STATUS 0x00000004
#endif /* _CBEA_MFC_H_ */

View File

@@ -1,149 +1,149 @@
/* --------------------------------------------------------------- */
/* PLEASE DO NOT MODIFY THIS SECTION */
/* This prolog section is automatically generated. */
/* */
/* (C) Copyright 2001,2006, */
/* International Business Machines Corporation, */
/* */
/* All Rights Reserved. */
/* --------------------------------------------------------------- */
/* PROLOG END TAG zYx */
/* spe_cache_defs.h
*
* Copyright (C) 2005 IBM Corp.
*
* Internal definitions for software managed cache.
*/
#ifndef __SPE_CACHE_DEFS_H__
#define __SPE_CACHE_DEFS_H__
/**
** Defn's for number of cache sets.
** Default is 64 sets.
*/
#if (SPE_CACHE_NSETS==1024)
#define SPE_CACHE_NSETS_SHIFT 10
#elif (SPE_CACHE_NSETS==512)
#define SPE_CACHE_NSETS_SHIFT 9
#elif (SPE_CACHE_NSETS==256)
#define SPE_CACHE_NSETS_SHIFT 8
#elif (SPE_CACHE_NSETS==128)
#define SPE_CACHE_NSETS_SHIFT 7
#elif (SPE_CACHE_NSETS==64)
#define SPE_CACHE_NSETS_SHIFT 6
#elif (SPE_CACHE_NSETS==32)
#define SPE_CACHE_NSETS_SHIFT 5
#elif (SPE_CACHE_NSETS==16)
#define SPE_CACHE_NSETS_SHIFT 4
#elif (SPE_CACHE_NSETS==8)
#define SPE_CACHE_NSETS_SHIFT 3
#elif (SPE_CACHE_NSETS==4)
#define SPE_CACHE_NSETS_SHIFT 2
#elif (SPE_CACHE_NSETS==2)
#define SPE_CACHE_NSETS_SHIFT 1
#else
#undef SPE_CACHE_NSETS
#define SPE_CACHE_NSETS 64
#define SPE_CACHE_NSETS_SHIFT 6
#endif
/**
** Defn's for cachline size (bytes).
** Default is 128 bytes.
*/
#if (SPE_CACHELINE_SIZE==512)
#define SPE_CACHELINE_SHIFT 9
#elif (SPE_CACHELINE_SIZE==256)
#define SPE_CACHELINE_SHIFT 8
#elif (SPE_CACHELINE_SIZE==128)
#define SPE_CACHELINE_SHIFT 7
#elif (SPE_CACHELINE_SIZE==64)
#define SPE_CACHELINE_SHIFT 6
#elif (SPE_CACHELINE_SIZE==32)
#define SPE_CACHELINE_SHIFT 5
#else
#undef SPE_CACHELINE_SIZE
#define SPE_CACHELINE_SIZE 128
#define SPE_CACHELINE_SHIFT 7
#endif
/**
** Defn's derived from above settings.
*/
#define SPE_CACHE_NSETS_MASK (SPE_CACHE_NSETS - 1)
#define SPE_CACHELINE_MASK (SPE_CACHELINE_SIZE - 1)
/**
** Defn's for managing cacheline state.
*/
#define SPE_CACHELINE_DIRTY 0x1
#define SPE_CACHELINE_LOCKED 0x2
#define SPE_CACHELINE_STATE_MASK (SPE_CACHELINE_DIRTY | SPE_CACHELINE_LOCKED)
#ifdef _XLC
/**
* FIXME: For now disable manual branch hints
* on XLC due to performance degradation.
*/
#ifndef likely
#define likely(_c) (_c)
#define unlikely(_c) (_c)
#endif
#else /* !_XLC */
#ifndef likely
#define likely(_c) __builtin_expect((_c), 1)
#define unlikely(_c) __builtin_expect((_c), 0)
#endif
#endif
/**
** Debug controls. Set -DNDEBUG to
** disable both panic and assert.
*/
#include <assert.h>
#define _spe_cache_panic_(c) assert(c)
#ifdef SPE_CACHE_DBG
#define _spe_cache_assert_(c) assert(c)
#else
#define _spe_cache_assert_(c) /* No-op. */
#endif
#define _spe_cacheline_byte_offset_(ea) \
((ea) & SPE_CACHELINE_MASK)
#define _spe_cacheline_byte_offset_x4(ea) \
spu_and ((ea), SPE_CACHELINE_MASK)
#endif
static __inline vector unsigned int _load_vec_uint4(unsigned int ui1, unsigned int ui2, unsigned int ui3, unsigned int ui4)
{
vector unsigned int result;
vector unsigned int iv1, iv2, iv3, iv4;
vector unsigned char shuffle = VEC_LITERAL(vector unsigned char,
0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
iv1 = spu_promote(ui1, 0);
iv2 = spu_promote(ui2, 0);
iv3 = spu_promote(ui3, 0);
iv4 = spu_promote(ui4, 0);
result = spu_or(spu_shuffle(iv1, iv2, shuffle), spu_shuffle(iv3, iv4, spu_rlqwbyte(shuffle, 8)));
return (result);
}
static __inline vector unsigned int _pack_vec_uint4(vector unsigned int ui1, vector unsigned int ui2, vector unsigned int ui3, vector unsigned int ui4)
{
vector unsigned int result;
vector unsigned char shuffle = VEC_LITERAL(vector unsigned char,
0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
result = spu_or(spu_shuffle(ui1, ui2, shuffle), spu_shuffle(ui3, ui4, spu_rlqwbyte(shuffle, 8)));
return (result);
}
/* --------------------------------------------------------------- */
/* PLEASE DO NOT MODIFY THIS SECTION */
/* This prolog section is automatically generated. */
/* */
/* (C) Copyright 2001,2006, */
/* International Business Machines Corporation, */
/* */
/* All Rights Reserved. */
/* --------------------------------------------------------------- */
/* PROLOG END TAG zYx */
/* spe_cache_defs.h
*
* Copyright (C) 2005 IBM Corp.
*
* Internal definitions for software managed cache.
*/
#ifndef __SPE_CACHE_DEFS_H__
#define __SPE_CACHE_DEFS_H__
/**
** Defn's for number of cache sets.
** Default is 64 sets.
*/
#if (SPE_CACHE_NSETS==1024)
#define SPE_CACHE_NSETS_SHIFT 10
#elif (SPE_CACHE_NSETS==512)
#define SPE_CACHE_NSETS_SHIFT 9
#elif (SPE_CACHE_NSETS==256)
#define SPE_CACHE_NSETS_SHIFT 8
#elif (SPE_CACHE_NSETS==128)
#define SPE_CACHE_NSETS_SHIFT 7
#elif (SPE_CACHE_NSETS==64)
#define SPE_CACHE_NSETS_SHIFT 6
#elif (SPE_CACHE_NSETS==32)
#define SPE_CACHE_NSETS_SHIFT 5
#elif (SPE_CACHE_NSETS==16)
#define SPE_CACHE_NSETS_SHIFT 4
#elif (SPE_CACHE_NSETS==8)
#define SPE_CACHE_NSETS_SHIFT 3
#elif (SPE_CACHE_NSETS==4)
#define SPE_CACHE_NSETS_SHIFT 2
#elif (SPE_CACHE_NSETS==2)
#define SPE_CACHE_NSETS_SHIFT 1
#else
#undef SPE_CACHE_NSETS
#define SPE_CACHE_NSETS 64
#define SPE_CACHE_NSETS_SHIFT 6
#endif
/**
** Defn's for cachline size (bytes).
** Default is 128 bytes.
*/
#if (SPE_CACHELINE_SIZE==512)
#define SPE_CACHELINE_SHIFT 9
#elif (SPE_CACHELINE_SIZE==256)
#define SPE_CACHELINE_SHIFT 8
#elif (SPE_CACHELINE_SIZE==128)
#define SPE_CACHELINE_SHIFT 7
#elif (SPE_CACHELINE_SIZE==64)
#define SPE_CACHELINE_SHIFT 6
#elif (SPE_CACHELINE_SIZE==32)
#define SPE_CACHELINE_SHIFT 5
#else
#undef SPE_CACHELINE_SIZE
#define SPE_CACHELINE_SIZE 128
#define SPE_CACHELINE_SHIFT 7
#endif
/**
** Defn's derived from above settings.
*/
#define SPE_CACHE_NSETS_MASK (SPE_CACHE_NSETS - 1)
#define SPE_CACHELINE_MASK (SPE_CACHELINE_SIZE - 1)
/**
** Defn's for managing cacheline state.
*/
#define SPE_CACHELINE_DIRTY 0x1
#define SPE_CACHELINE_LOCKED 0x2
#define SPE_CACHELINE_STATE_MASK (SPE_CACHELINE_DIRTY | SPE_CACHELINE_LOCKED)
#ifdef _XLC
/**
* FIXME: For now disable manual branch hints
* on XLC due to performance degradation.
*/
#ifndef likely
#define likely(_c) (_c)
#define unlikely(_c) (_c)
#endif
#else /* !_XLC */
#ifndef likely
#define likely(_c) __builtin_expect((_c), 1)
#define unlikely(_c) __builtin_expect((_c), 0)
#endif
#endif
/**
** Debug controls. Set -DNDEBUG to
** disable both panic and assert.
*/
#include <assert.h>
#define _spe_cache_panic_(c) assert(c)
#ifdef SPE_CACHE_DBG
#define _spe_cache_assert_(c) assert(c)
#else
#define _spe_cache_assert_(c) /* No-op. */
#endif
#define _spe_cacheline_byte_offset_(ea) \
((ea) & SPE_CACHELINE_MASK)
#define _spe_cacheline_byte_offset_x4(ea) \
spu_and ((ea), SPE_CACHELINE_MASK)
#endif
static __inline vector unsigned int _load_vec_uint4(unsigned int ui1, unsigned int ui2, unsigned int ui3, unsigned int ui4)
{
vector unsigned int result;
vector unsigned int iv1, iv2, iv3, iv4;
vector unsigned char shuffle = VEC_LITERAL(vector unsigned char,
0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
iv1 = spu_promote(ui1, 0);
iv2 = spu_promote(ui2, 0);
iv3 = spu_promote(ui3, 0);
iv4 = spu_promote(ui4, 0);
result = spu_or(spu_shuffle(iv1, iv2, shuffle), spu_shuffle(iv3, iv4, spu_rlqwbyte(shuffle, 8)));
return (result);
}
static __inline vector unsigned int _pack_vec_uint4(vector unsigned int ui1, vector unsigned int ui2, vector unsigned int ui3, vector unsigned int ui4)
{
vector unsigned int result;
vector unsigned char shuffle = VEC_LITERAL(vector unsigned char,
0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
result = spu_or(spu_shuffle(ui1, ui2, shuffle), spu_shuffle(ui3, ui4, spu_rlqwbyte(shuffle, 8)));
return (result);
}

View File

@@ -1,40 +1,40 @@
/* --------------------------------------------------------------- */
/* PLEASE DO NOT MODIFY THIS SECTION */
/* This prolog section is automatically generated. */
/* */
/* (C) Copyright 2001,2006, */
/* International Business Machines Corporation, */
/* */
/* All Rights Reserved. */
/* --------------------------------------------------------------- */
/* PROLOG END TAG zYx */
/* dma.h
*
* Copyright (C) 2005 IBM Corp.
*
* Internal DMA utilities for software
* managed cache.
*/
#ifndef __SPE_CACHE_DMA_H__
#define __SPE_CACHE_DMA_H__
#define SPE_CACHE_TAGID_SHIFT (SPE_CACHELINE_SHIFT + SPE_CACHE_NWAY_SHIFT)
#define _SPE_CACHELINE_TAGID(_ptr) (16)
#define _SPE_CACHELINE_TAGMASK(_ptr) (1 << 16)
#define SPE_CACHELINE_TAGID(_line) \
_SPE_CACHELINE_TAGID(&spe_cache_mem[_line])
#define SPE_CACHELINE_TAGMASK(_line) \
_SPE_CACHELINE_TAGMASK(&spe_cache_mem[_line])
#ifndef SPE_CACHE_SET_TAGID
#define SPE_CACHE_SET_TAGID(set) ((set) & 0x1f)
#endif
#define SPE_CACHE_SET_TAGMASK(set) (1 << SPE_CACHE_SET_TAGID(set))
#define SPE_CACHE_PUT MFC_PUTF_CMD
#define SPE_CACHE_GET MFC_GET_CMD
#endif
/* --------------------------------------------------------------- */
/* PLEASE DO NOT MODIFY THIS SECTION */
/* This prolog section is automatically generated. */
/* */
/* (C) Copyright 2001,2006, */
/* International Business Machines Corporation, */
/* */
/* All Rights Reserved. */
/* --------------------------------------------------------------- */
/* PROLOG END TAG zYx */
/* dma.h
*
* Copyright (C) 2005 IBM Corp.
*
* Internal DMA utilities for software
* managed cache.
*/
#ifndef __SPE_CACHE_DMA_H__
#define __SPE_CACHE_DMA_H__
#define SPE_CACHE_TAGID_SHIFT (SPE_CACHELINE_SHIFT + SPE_CACHE_NWAY_SHIFT)
#define _SPE_CACHELINE_TAGID(_ptr) (16)
#define _SPE_CACHELINE_TAGMASK(_ptr) (1 << 16)
#define SPE_CACHELINE_TAGID(_line) \
_SPE_CACHELINE_TAGID(&spe_cache_mem[_line])
#define SPE_CACHELINE_TAGMASK(_line) \
_SPE_CACHELINE_TAGMASK(&spe_cache_mem[_line])
#ifndef SPE_CACHE_SET_TAGID
#define SPE_CACHE_SET_TAGID(set) ((set) & 0x1f)
#endif
#define SPE_CACHE_SET_TAGMASK(set) (1 << SPE_CACHE_SET_TAGID(set))
#define SPE_CACHE_PUT MFC_PUTF_CMD
#define SPE_CACHE_GET MFC_GET_CMD
#endif

View File

@@ -1,35 +1,35 @@
/* @(#)12 1.5 src/lib/math/ilog2.h, sw.lib, sdk_pub 10/11/05 15:35:56 */
/* -------------------------------------------------------------- */
/* (C) Copyright 2001,2005, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment Incorporated, */
/* Toshiba Corporation. */
/* */
/* All Rights Reserved. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifndef _ILOG2_H_
#define _ILOG2_H_ 1
/*
* FUNCTION
* signed int _ilog2(signed int x)
*
* DESCRIPTION
* _ilog2 computes ceiling of log (base 2) of the input value x.
* The input value, x, must be a non-zero positive value.
*/
static __inline signed int _ilog2(signed int x)
{
#ifdef __SPU__
return (32 - spu_extract(spu_cntlz(spu_promote(x - 1, 0)), 0));
#else
signed int result;
for (result=0, x--; x > 0; result++, x>>=1);
return (result);
#endif
}
#endif /* _ILOG2_H_ */
/* @(#)12 1.5 src/lib/math/ilog2.h, sw.lib, sdk_pub 10/11/05 15:35:56 */
/* -------------------------------------------------------------- */
/* (C) Copyright 2001,2005, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment Incorporated, */
/* Toshiba Corporation. */
/* */
/* All Rights Reserved. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifndef _ILOG2_H_
#define _ILOG2_H_ 1
/*
* FUNCTION
* signed int _ilog2(signed int x)
*
* DESCRIPTION
* _ilog2 computes ceiling of log (base 2) of the input value x.
* The input value, x, must be a non-zero positive value.
*/
static __inline signed int _ilog2(signed int x)
{
#ifdef __SPU__
return (32 - spu_extract(spu_cntlz(spu_promote(x - 1, 0)), 0));
#else
signed int result;
for (result=0, x--; x > 0; result++, x>>=1);
return (result);
#endif
}
#endif /* _ILOG2_H_ */

View File

@@ -1,68 +1,68 @@
/* @(#)85 1.4 src/lib/c/memset.h, sw.lib, sdk_pub 10/13/05 10:17:09 */
/* -------------------------------------------------------------- */
/* (C) Copyright 2001,2005, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment Incorporated, */
/* Toshiba Corporation. */
/* */
/* All Rights Reserved. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#include <spu_intrinsics.h>
#include <stddef.h>
/* Fills the first n bytes of the memory area pointed to by s
* with the constant byte c. Returns a pointer to the memory area s.
*/
static __inline void * _memset(void *s, int c, size_t n)
{
int skip, cnt, i;
vec_uchar16 *vs;
vec_uchar16 vc, mask;
vs = (vec_uchar16 *)(s);
vc = spu_splats((unsigned char)c);
cnt = (int)(n);
/* Handle any leading partial quadwords as well a
* very short settings (ie, such that the n characters
* all reside in a single quadword.
*/
skip = (int)(s) & 15;
if (skip) {
mask = spu_rlmaskqwbyte((vec_uchar16)(-1), 0-skip);
cnt -= 16 - skip;
if (cnt < 0) {
mask = spu_and(mask, spu_slqwbyte((vec_uchar16)(-1), (unsigned int)(-cnt)));
}
*vs = spu_sel(*vs, vc, mask);
vs++;
}
/* Handle 8 quadwords at a time
*/
for (i=127; i<cnt; cnt-=8*16) {
vs[0] = vc;
vs[1] = vc;
vs[2] = vc;
vs[3] = vc;
vs[4] = vc;
vs[5] = vc;
vs[6] = vc;
vs[7] = vc;
vs += 8;
}
/* Finish all remaining complete quadwords
*/
for (i=15; i<cnt; cnt-=16) *vs++ = vc;
/* Handle any trailing partial quadwords
*/
if (cnt > 0) {
mask = spu_slqwbyte((vec_uchar16)(-1), (unsigned int)(16-cnt));
*vs = spu_sel(*vs, vc, mask);
}
return (s);
}
/* @(#)85 1.4 src/lib/c/memset.h, sw.lib, sdk_pub 10/13/05 10:17:09 */
/* -------------------------------------------------------------- */
/* (C) Copyright 2001,2005, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment Incorporated, */
/* Toshiba Corporation. */
/* */
/* All Rights Reserved. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#include <spu_intrinsics.h>
#include <stddef.h>
/* Fills the first n bytes of the memory area pointed to by s
* with the constant byte c. Returns a pointer to the memory area s.
*/
static __inline void * _memset(void *s, int c, size_t n)
{
int skip, cnt, i;
vec_uchar16 *vs;
vec_uchar16 vc, mask;
vs = (vec_uchar16 *)(s);
vc = spu_splats((unsigned char)c);
cnt = (int)(n);
/* Handle any leading partial quadwords as well a
* very short settings (ie, such that the n characters
* all reside in a single quadword.
*/
skip = (int)(s) & 15;
if (skip) {
mask = spu_rlmaskqwbyte((vec_uchar16)(-1), 0-skip);
cnt -= 16 - skip;
if (cnt < 0) {
mask = spu_and(mask, spu_slqwbyte((vec_uchar16)(-1), (unsigned int)(-cnt)));
}
*vs = spu_sel(*vs, vc, mask);
vs++;
}
/* Handle 8 quadwords at a time
*/
for (i=127; i<cnt; cnt-=8*16) {
vs[0] = vc;
vs[1] = vc;
vs[2] = vc;
vs[3] = vc;
vs[4] = vc;
vs[5] = vc;
vs[6] = vc;
vs[7] = vc;
vs += 8;
}
/* Finish all remaining complete quadwords
*/
for (i=15; i<cnt; cnt-=16) *vs++ = vc;
/* Handle any trailing partial quadwords
*/
if (cnt > 0) {
mask = spu_slqwbyte((vec_uchar16)(-1), (unsigned int)(16-cnt));
*vs = spu_sel(*vs, vc, mask);
}
return (s);
}

View File

@@ -1,194 +1,194 @@
/* --------------------------------------------------------------- */
/* PLEASE DO NOT MODIFY THIS SECTION */
/* This prolog section is automatically generated. */
/* */
/* (C) Copyright 2001,2006, */
/* International Business Machines Corporation, */
/* */
/* All Rights Reserved. */
/* --------------------------------------------------------------- */
/* PROLOG END TAG zYx */
/* nway-lookup.h
*
* Copyright (C) 2005 IBM Corp.
*
* Internal lookup operations for software
* managed cache.
*
* See nway-opt.h for "optimized" nway
* lookup operations.
*/
#ifndef __SPE_CACHE_NWAY_LOOKUP_H_
#define __SPE_CACHE_NWAY_LOOKUP_H_
/**
* _decl_set_entries_
* Load up set entries (by 4) from an n-way
* set associative cache. Mask off the dirty
* bit, as needed.
*/
#define _decl_set_entries_(set, name, index) \
vec_uint4 name = *((vec_uint4 *) &spe_cache_dir[set][index])
#define _spe_cache_4_way_lookup_(set, ea) \
({ \
_decl_set_entries_(set, e0123, 0); \
spu_gather(spu_cmpeq(e0123, ea)); \
})
/**
* _spe_cache_set_lookup_
* Compare 'ea' against all entries of
* a set, and return a result that is
* consistent with spu_gather().
*/
#define _spe_cache_set_lookup_(set, ea) \
_spe_cache_4_way_lookup_(set, ea)
/**
* _spe_cache_nway_lookup_x4_
* Declare local variables and lookup four addresses
* in the n-way set associative cache. Upon return,
* 'idx_x4' contains the matching elements in the sets,
* or -1 if not found.
*/
#define _spe_cache_nway_lookup_x4(ea_x4, set_x4, idx_x4) \
({ \
vector unsigned int ea_aligned_x4 = spu_and ((ea_x4), ~SPE_CACHELINE_MASK); \
vector unsigned char splat0 = VEC_LITERAL(vector unsigned char, \
0x00, 0x01, 0x02, 0x03, \
0x00, 0x01, 0x02, 0x03, \
0x00, 0x01, 0x02, 0x03, \
0x00, 0x01, 0x02, 0x03); \
vector unsigned char splat1 = VEC_LITERAL(vector unsigned char, \
0x04, 0x05, 0x06, 0x07, \
0x04, 0x05, 0x06, 0x07, \
0x04, 0x05, 0x06, 0x07, \
0x04, 0x05, 0x06, 0x07); \
vector unsigned char splat2 = VEC_LITERAL(vector unsigned char, \
0x08, 0x09, 0x0a, 0x0b, \
0x08, 0x09, 0x0a, 0x0b, \
0x08, 0x09, 0x0a, 0x0b, \
0x08, 0x09, 0x0a, 0x0b); \
vector unsigned char splat3 = VEC_LITERAL(vector unsigned char, \
0x0c, 0x0d, 0x0e, 0x0f, \
0x0c, 0x0d, 0x0e, 0x0f, \
0x0c, 0x0d, 0x0e, 0x0f, \
0x0c, 0x0d, 0x0e, 0x0f); \
vec_uint4 ea_aligned0 = spu_shuffle(ea_aligned_x4, ea_aligned_x4, splat0); \
vec_uint4 ea_aligned1 = spu_shuffle(ea_aligned_x4, ea_aligned_x4, splat1); \
vec_uint4 ea_aligned2 = spu_shuffle(ea_aligned_x4, ea_aligned_x4, splat2); \
vec_uint4 ea_aligned3 = spu_shuffle(ea_aligned_x4, ea_aligned_x4, splat3); \
vec_uint4 found0, found1, found2, found3; \
vec_uint4 found_x4; \
(set_x4) = _spe_cache_set_num_x4(ea_x4); \
found0 = _spe_cache_set_lookup_(spu_extract (set_x4, 0), ea_aligned0); \
found1 = _spe_cache_set_lookup_(spu_extract (set_x4, 1), ea_aligned1); \
found2 = _spe_cache_set_lookup_(spu_extract (set_x4, 2), ea_aligned2); \
found3 = _spe_cache_set_lookup_(spu_extract (set_x4, 3), ea_aligned3); \
found_x4 = _pack_vec_uint4 (found0, found1, found2, found3); \
(idx_x4) = (vector signed int)_spe_cache_idx_num_x4(found_x4); \
})
#define _spe_cache_nway_lookup_(ea, set, idx) \
({ \
unsigned int ea_aligned = (ea) & ~SPE_CACHELINE_MASK; \
vec_uint4 ea_aligned4 = spu_splats(ea_aligned); \
vec_uint4 found; \
(set) = _spe_cache_set_num_(ea); \
found = _spe_cache_set_lookup_(set, ea_aligned4); \
(idx) = _spe_cache_idx_num_(found); \
})
/**
* _spe_cache_lookup_
* Lookup and return the LSA of an EA
* that is known to be in the cache.
*/
#define _spe_cache_lookup_(ea, is_write) \
({ \
int set, idx, line, byte; \
_spe_cache_nway_lookup_(ea, set, idx); \
\
line = _spe_cacheline_num_(set, idx); \
byte = _spe_cacheline_byte_offset_(ea); \
(void *) &spe_cache_mem[line + byte]; \
})
/**
* _spe_cache_wait_
* Wait for transfer of a cache line
* to complete.
*/
#define _spe_cache_wait_(_lsa) \
({ \
spu_writech(22, _SPE_CACHELINE_TAGMASK(_lsa)); \
spu_mfcstat(MFC_TAG_UPDATE_ALL); \
})
/**
* _spe_cache_lookup_wait_
* Lookup and return the LSA of an EA
* that is known to be in the cache,
* and guarantee that its transfer is
* complete.
*/
#define _spe_cache_lookup_wait_(ea, is_write) \
({ \
int set, idx, line, byte; \
_spe_cache_nway_lookup_(ea, set, idx); \
\
line = _spe_cacheline_num_(set, idx); \
byte = _spe_cacheline_byte_offset_(ea); \
spu_writech(22, SPE_CACHE_SET_TAGMASK(set)); \
spu_mfcstat(MFC_TAG_UPDATE_ALL); \
(void *) &spe_cache_mem[line + byte]; \
})
/**
* _spe_cache_lookup_xfer_
* Lookup and return the LSA of an EA, where
* the line may either be in the cache or not.
* If not, initiate transfer but do not wait
* for completion.
*/
#define _spe_cache_lookup_xfer_(ea, is_write, rb) \
({ \
int set, idx, line, byte; \
_spe_cache_nway_lookup_(ea, set, idx); \
\
if (unlikely(idx < 0)) { \
idx = _spe_cache_miss_(ea, set, -1); \
} \
line = _spe_cacheline_num_(set, idx); \
byte = _spe_cacheline_byte_offset_(ea); \
(void *) &spe_cache_mem[line + byte]; \
})
/**
* _spe_cache_lookup_xfer_wait_
* Lookup and return the LSA of an EA, where
* the line may either be in the cache or not.
* If not, initiate transfer and guarantee
* completion.
*/
#define _spe_cache_lookup_xfer_wait_(ea, is_write, rb) \
({ \
int set, idx, line, byte; \
_spe_cache_nway_lookup_(ea, set, idx); \
\
if (unlikely(idx < 0)) { \
idx = _spe_cache_miss_(ea, set, -1); \
spu_writech(22, SPE_CACHE_SET_TAGMASK(set)); \
spu_mfcstat(MFC_TAG_UPDATE_ALL); \
} \
line = _spe_cacheline_num_(set, idx); \
byte = _spe_cacheline_byte_offset_(ea); \
(void *) &spe_cache_mem[line + byte]; \
})
#endif
/* --------------------------------------------------------------- */
/* PLEASE DO NOT MODIFY THIS SECTION */
/* This prolog section is automatically generated. */
/* */
/* (C) Copyright 2001,2006, */
/* International Business Machines Corporation, */
/* */
/* All Rights Reserved. */
/* --------------------------------------------------------------- */
/* PROLOG END TAG zYx */
/* nway-lookup.h
*
* Copyright (C) 2005 IBM Corp.
*
* Internal lookup operations for software
* managed cache.
*
* See nway-opt.h for "optimized" nway
* lookup operations.
*/
#ifndef __SPE_CACHE_NWAY_LOOKUP_H_
#define __SPE_CACHE_NWAY_LOOKUP_H_
/**
* _decl_set_entries_
* Load up set entries (by 4) from an n-way
* set associative cache. Mask off the dirty
* bit, as needed.
*/
#define _decl_set_entries_(set, name, index) \
vec_uint4 name = *((vec_uint4 *) &spe_cache_dir[set][index])
#define _spe_cache_4_way_lookup_(set, ea) \
({ \
_decl_set_entries_(set, e0123, 0); \
spu_gather(spu_cmpeq(e0123, ea)); \
})
/**
* _spe_cache_set_lookup_
* Compare 'ea' against all entries of
* a set, and return a result that is
* consistent with spu_gather().
*/
#define _spe_cache_set_lookup_(set, ea) \
_spe_cache_4_way_lookup_(set, ea)
/**
* _spe_cache_nway_lookup_x4_
* Declare local variables and lookup four addresses
* in the n-way set associative cache. Upon return,
* 'idx_x4' contains the matching elements in the sets,
* or -1 if not found.
*/
#define _spe_cache_nway_lookup_x4(ea_x4, set_x4, idx_x4) \
({ \
vector unsigned int ea_aligned_x4 = spu_and ((ea_x4), ~SPE_CACHELINE_MASK); \
vector unsigned char splat0 = VEC_LITERAL(vector unsigned char, \
0x00, 0x01, 0x02, 0x03, \
0x00, 0x01, 0x02, 0x03, \
0x00, 0x01, 0x02, 0x03, \
0x00, 0x01, 0x02, 0x03); \
vector unsigned char splat1 = VEC_LITERAL(vector unsigned char, \
0x04, 0x05, 0x06, 0x07, \
0x04, 0x05, 0x06, 0x07, \
0x04, 0x05, 0x06, 0x07, \
0x04, 0x05, 0x06, 0x07); \
vector unsigned char splat2 = VEC_LITERAL(vector unsigned char, \
0x08, 0x09, 0x0a, 0x0b, \
0x08, 0x09, 0x0a, 0x0b, \
0x08, 0x09, 0x0a, 0x0b, \
0x08, 0x09, 0x0a, 0x0b); \
vector unsigned char splat3 = VEC_LITERAL(vector unsigned char, \
0x0c, 0x0d, 0x0e, 0x0f, \
0x0c, 0x0d, 0x0e, 0x0f, \
0x0c, 0x0d, 0x0e, 0x0f, \
0x0c, 0x0d, 0x0e, 0x0f); \
vec_uint4 ea_aligned0 = spu_shuffle(ea_aligned_x4, ea_aligned_x4, splat0); \
vec_uint4 ea_aligned1 = spu_shuffle(ea_aligned_x4, ea_aligned_x4, splat1); \
vec_uint4 ea_aligned2 = spu_shuffle(ea_aligned_x4, ea_aligned_x4, splat2); \
vec_uint4 ea_aligned3 = spu_shuffle(ea_aligned_x4, ea_aligned_x4, splat3); \
vec_uint4 found0, found1, found2, found3; \
vec_uint4 found_x4; \
(set_x4) = _spe_cache_set_num_x4(ea_x4); \
found0 = _spe_cache_set_lookup_(spu_extract (set_x4, 0), ea_aligned0); \
found1 = _spe_cache_set_lookup_(spu_extract (set_x4, 1), ea_aligned1); \
found2 = _spe_cache_set_lookup_(spu_extract (set_x4, 2), ea_aligned2); \
found3 = _spe_cache_set_lookup_(spu_extract (set_x4, 3), ea_aligned3); \
found_x4 = _pack_vec_uint4 (found0, found1, found2, found3); \
(idx_x4) = (vector signed int)_spe_cache_idx_num_x4(found_x4); \
})
#define _spe_cache_nway_lookup_(ea, set, idx) \
({ \
unsigned int ea_aligned = (ea) & ~SPE_CACHELINE_MASK; \
vec_uint4 ea_aligned4 = spu_splats(ea_aligned); \
vec_uint4 found; \
(set) = _spe_cache_set_num_(ea); \
found = _spe_cache_set_lookup_(set, ea_aligned4); \
(idx) = _spe_cache_idx_num_(found); \
})
/**
* _spe_cache_lookup_
* Lookup and return the LSA of an EA
* that is known to be in the cache.
*/
#define _spe_cache_lookup_(ea, is_write) \
({ \
int set, idx, line, byte; \
_spe_cache_nway_lookup_(ea, set, idx); \
\
line = _spe_cacheline_num_(set, idx); \
byte = _spe_cacheline_byte_offset_(ea); \
(void *) &spe_cache_mem[line + byte]; \
})
/**
* _spe_cache_wait_
* Wait for transfer of a cache line
* to complete.
*/
#define _spe_cache_wait_(_lsa) \
({ \
spu_writech(22, _SPE_CACHELINE_TAGMASK(_lsa)); \
spu_mfcstat(MFC_TAG_UPDATE_ALL); \
})
/**
* _spe_cache_lookup_wait_
* Lookup and return the LSA of an EA
* that is known to be in the cache,
* and guarantee that its transfer is
* complete.
*/
#define _spe_cache_lookup_wait_(ea, is_write) \
({ \
int set, idx, line, byte; \
_spe_cache_nway_lookup_(ea, set, idx); \
\
line = _spe_cacheline_num_(set, idx); \
byte = _spe_cacheline_byte_offset_(ea); \
spu_writech(22, SPE_CACHE_SET_TAGMASK(set)); \
spu_mfcstat(MFC_TAG_UPDATE_ALL); \
(void *) &spe_cache_mem[line + byte]; \
})
/**
* _spe_cache_lookup_xfer_
* Lookup and return the LSA of an EA, where
* the line may either be in the cache or not.
* If not, initiate transfer but do not wait
* for completion.
*/
#define _spe_cache_lookup_xfer_(ea, is_write, rb) \
({ \
int set, idx, line, byte; \
_spe_cache_nway_lookup_(ea, set, idx); \
\
if (unlikely(idx < 0)) { \
idx = _spe_cache_miss_(ea, set, -1); \
} \
line = _spe_cacheline_num_(set, idx); \
byte = _spe_cacheline_byte_offset_(ea); \
(void *) &spe_cache_mem[line + byte]; \
})
/**
* _spe_cache_lookup_xfer_wait_
* Lookup and return the LSA of an EA, where
* the line may either be in the cache or not.
* If not, initiate transfer and guarantee
* completion.
*/
#define _spe_cache_lookup_xfer_wait_(ea, is_write, rb) \
({ \
int set, idx, line, byte; \
_spe_cache_nway_lookup_(ea, set, idx); \
\
if (unlikely(idx < 0)) { \
idx = _spe_cache_miss_(ea, set, -1); \
spu_writech(22, SPE_CACHE_SET_TAGMASK(set)); \
spu_mfcstat(MFC_TAG_UPDATE_ALL); \
} \
line = _spe_cacheline_num_(set, idx); \
byte = _spe_cacheline_byte_offset_(ea); \
(void *) &spe_cache_mem[line + byte]; \
})
#endif

View File

@@ -1,51 +1,51 @@
/* --------------------------------------------------------------- */
/* PLEASE DO NOT MODIFY THIS SECTION */
/* This prolog section is automatically generated. */
/* */
/* (C) Copyright 2001,2006, */
/* International Business Machines Corporation, */
/* */
/* All Rights Reserved. */
/* --------------------------------------------------------------- */
/* PROLOG END TAG zYx */
/* nway-miss.h
*
* Copyright (C) 2005 IBM Corp.
*
* Internal handler for cache misses.
*/
#ifndef __SPE_CACHE_NWAY_MISS_H__
#define __SPE_CACHE_NWAY_MISS_H__
static int _spe_cache_miss_(unsigned int ea, int set, int avail)
{
unsigned int ea_aligned = ea & ~SPE_CACHELINE_MASK;
vec_uint4 slot;
vec_uint4 exists = _spe_cache_set_lookup_(set, ea_aligned);
int idx, line;
/* Double check to make sure that the entry has not
* already been allocated in this set. This condition
* might occur if multiple lookups are being perfomed
* simultaneously.
*/
if (unlikely(spu_extract(exists, 0) != 0)) {
return _spe_cache_idx_num_(exists);
}
/* Now check to see if there are empty slots
* that are available in the set.
*/
slot = _spe_cache_replace_(set, avail);
idx = _spe_cache_idx_num_(slot);
line = _spe_cacheline_num_(set, idx);
spu_mfcdma32(&spe_cache_mem[line], ea_aligned, SPE_CACHELINE_SIZE,
SPE_CACHE_SET_TAGID(set), SPE_CACHE_GET);
spe_cache_dir[set][SPE_CACHE_NWAY_MASK - idx] = ea_aligned;
return idx;
}
#endif
/* --------------------------------------------------------------- */
/* PLEASE DO NOT MODIFY THIS SECTION */
/* This prolog section is automatically generated. */
/* */
/* (C) Copyright 2001,2006, */
/* International Business Machines Corporation, */
/* */
/* All Rights Reserved. */
/* --------------------------------------------------------------- */
/* PROLOG END TAG zYx */
/* nway-miss.h
*
* Copyright (C) 2005 IBM Corp.
*
* Internal handler for cache misses.
*/
#ifndef __SPE_CACHE_NWAY_MISS_H__
#define __SPE_CACHE_NWAY_MISS_H__
static int _spe_cache_miss_(unsigned int ea, int set, int avail)
{
unsigned int ea_aligned = ea & ~SPE_CACHELINE_MASK;
vec_uint4 slot;
vec_uint4 exists = _spe_cache_set_lookup_(set, ea_aligned);
int idx, line;
/* Double check to make sure that the entry has not
* already been allocated in this set. This condition
* might occur if multiple lookups are being perfomed
* simultaneously.
*/
if (unlikely(spu_extract(exists, 0) != 0)) {
return _spe_cache_idx_num_(exists);
}
/* Now check to see if there are empty slots
* that are available in the set.
*/
slot = _spe_cache_replace_(set, avail);
idx = _spe_cache_idx_num_(slot);
line = _spe_cacheline_num_(set, idx);
spu_mfcdma32(&spe_cache_mem[line], ea_aligned, SPE_CACHELINE_SIZE,
SPE_CACHE_SET_TAGID(set), SPE_CACHE_GET);
spe_cache_dir[set][SPE_CACHE_NWAY_MASK - idx] = ea_aligned;
return idx;
}
#endif

View File

@@ -1,153 +1,153 @@
/* --------------------------------------------------------------- */
/* PLEASE DO NOT MODIFY THIS SECTION */
/* This prolog section is automatically generated. */
/* */
/* (C) Copyright 2001,2006, */
/* International Business Machines Corporation, */
/* */
/* All Rights Reserved. */
/* --------------------------------------------------------------- */
/* PROLOG END TAG zYx */
/* nway-opt.h
*
* Copyright (C) 2006 IBM Corp.
*
* "Optimized" lookup operations for n-way set associative
* software managed cache.
*/
#include <spu_intrinsics.h>
#ifndef __SPE_CACHE_NWAY_OPT_H_
#define __SPE_CACHE_NWAY_OPT_H_
/* __spe_cache_rd
* Look up and return data from the cache. If the data
* is not currently in cache then transfer it from main
* storage.
*
* This code uses a conditional branch to the cache miss
* handler in the event that the requested data is not
* in the cache. A branch hint is used to avoid paying
* the branch stall penalty.
*/
#define __spe_cache_rd(type, ea) \
({ \
int set, idx, lnum, byte; \
type ret; \
_spe_cache_nway_lookup_(ea, set, idx); \
if (unlikely(idx < 0)) { \
idx = _spe_cache_miss_(ea, set, -1); \
spu_writech(22, SPE_CACHE_SET_TAGMASK(set)); \
spu_mfcstat(MFC_TAG_UPDATE_ALL); \
} \
lnum = _spe_cacheline_num_(set, idx); \
byte = _spe_cacheline_byte_offset_(ea); \
ret = *((type *) (&spe_cache_mem[lnum + byte])); \
ret; \
})
/**
* __spe_cache_rd_x4
* Fetch four data elements from the cache.
*
* This code uses one conditional branch in
* the event that any of the four elements
* are missing.
*
* On a miss, light weight locking is used to
* avoid casting out entries that were found.
* Further, we wait just once for the transfers,
* allowing for parallel [rather than serial]
* transfers.
*/
#define __spe_cache_rd_x4(type, ea_x4) \
({ \
vector unsigned int missing; \
unsigned int ms; \
vector unsigned int cindex; \
unsigned int d0, d1, d2, d3; \
vector unsigned int s_x4; \
vector signed int i_x4; \
vector unsigned int ibyte, iline; \
vector unsigned int ret; \
unsigned int idx0, idx1, idx2, idx3; \
\
_spe_cache_nway_lookup_x4(ea_x4, s_x4, i_x4); \
missing = spu_rlmask ((vector unsigned int)i_x4, -8); \
ms = spu_extract (spu_gather (missing), 0); \
\
ibyte = _spe_cacheline_byte_offset_x4(ea_x4); \
\
iline = _spe_cacheline_num_x4(s_x4, \
(vector unsigned int)i_x4); \
\
cindex = spu_add (iline, ibyte); \
\
idx0 = spu_extract (cindex, 0); \
idx1 = spu_extract (cindex, 1); \
idx2 = spu_extract (cindex, 2); \
idx3 = spu_extract (cindex, 3); \
\
d0 = *((type *) (&spe_cache_mem[idx0])); \
d1 = *((type *) (&spe_cache_mem[idx1])); \
d2 = *((type *) (&spe_cache_mem[idx2])); \
d3 = *((type *) (&spe_cache_mem[idx3])); \
\
ret = _load_vec_uint4 (d0, d1, d2, d3); \
\
if (unlikely(ms)) { \
int b0 = spu_extract (ibyte, 0); \
int b1 = spu_extract (ibyte, 1); \
int b2 = spu_extract (ibyte, 2); \
int b3 = spu_extract (ibyte, 3); \
int lnum0; \
int lnum1; \
int lnum2; \
int lnum3; \
int s0 = spu_extract (s_x4, 0); \
int s1 = spu_extract (s_x4, 1); \
int s2 = spu_extract (s_x4, 2); \
int s3 = spu_extract (s_x4, 3); \
int i0 = spu_extract (i_x4, 0); \
int i1 = spu_extract (i_x4, 1); \
int i2 = spu_extract (i_x4, 2); \
int i3 = spu_extract (i_x4, 3); \
unsigned int ea0 = spu_extract(ea_x4, 0); \
unsigned int ea1 = spu_extract(ea_x4, 1); \
unsigned int ea2 = spu_extract(ea_x4, 2); \
unsigned int ea3 = spu_extract(ea_x4, 3); \
int avail = -1; \
\
avail &= ~(((i0 < 0) ? 0 : (1 << i0)) | \
((i1 < 0) ? 0 : (1 << i1)) | \
((i2 < 0) ? 0 : (1 << i2)) | \
((i3 < 0) ? 0 : (1 << i3))); \
\
i0 = _spe_cache_miss_(ea0, s0, avail); \
avail &= ~(1 << i0); \
i1 = _spe_cache_miss_(ea1, s1, avail); \
avail &= ~(1 << i1); \
i2 = _spe_cache_miss_(ea2, s2, avail); \
avail &= ~(1 << i2); \
i3 = _spe_cache_miss_(ea3, s3, avail); \
\
lnum0 = _spe_cacheline_num_(s0, i0); \
lnum1 = _spe_cacheline_num_(s1, i1); \
lnum2 = _spe_cacheline_num_(s2, i2); \
lnum3 = _spe_cacheline_num_(s3, i3); \
\
spu_writech(22, SPE_CACHE_SET_TAGMASK(set)); \
spu_mfcstat(MFC_TAG_UPDATE_ALL); \
\
d0 = *((type *) (&spe_cache_mem[lnum0 + b0])); \
d1 = *((type *) (&spe_cache_mem[lnum1 + b1])); \
d2 = *((type *) (&spe_cache_mem[lnum2 + b2])); \
d3 = *((type *) (&spe_cache_mem[lnum3 + b3])); \
\
ret = _load_vec_uint4 (d0, d1, d2, d3); \
} \
ret; \
})
#endif /* _SPE_CACHE_NWAY_OPT_H_ */
/* --------------------------------------------------------------- */
/* PLEASE DO NOT MODIFY THIS SECTION */
/* This prolog section is automatically generated. */
/* */
/* (C) Copyright 2001,2006, */
/* International Business Machines Corporation, */
/* */
/* All Rights Reserved. */
/* --------------------------------------------------------------- */
/* PROLOG END TAG zYx */
/* nway-opt.h
*
* Copyright (C) 2006 IBM Corp.
*
* "Optimized" lookup operations for n-way set associative
* software managed cache.
*/
#include <spu_intrinsics.h>
#ifndef __SPE_CACHE_NWAY_OPT_H_
#define __SPE_CACHE_NWAY_OPT_H_
/* __spe_cache_rd
* Look up and return data from the cache. If the data
* is not currently in cache then transfer it from main
* storage.
*
* This code uses a conditional branch to the cache miss
* handler in the event that the requested data is not
* in the cache. A branch hint is used to avoid paying
* the branch stall penalty.
*/
#define __spe_cache_rd(type, ea) \
({ \
int set, idx, lnum, byte; \
type ret; \
_spe_cache_nway_lookup_(ea, set, idx); \
if (unlikely(idx < 0)) { \
idx = _spe_cache_miss_(ea, set, -1); \
spu_writech(22, SPE_CACHE_SET_TAGMASK(set)); \
spu_mfcstat(MFC_TAG_UPDATE_ALL); \
} \
lnum = _spe_cacheline_num_(set, idx); \
byte = _spe_cacheline_byte_offset_(ea); \
ret = *((type *) (&spe_cache_mem[lnum + byte])); \
ret; \
})
/**
* __spe_cache_rd_x4
* Fetch four data elements from the cache.
*
* This code uses one conditional branch in
* the event that any of the four elements
* are missing.
*
* On a miss, light weight locking is used to
* avoid casting out entries that were found.
* Further, we wait just once for the transfers,
* allowing for parallel [rather than serial]
* transfers.
*/
#define __spe_cache_rd_x4(type, ea_x4) \
({ \
vector unsigned int missing; \
unsigned int ms; \
vector unsigned int cindex; \
unsigned int d0, d1, d2, d3; \
vector unsigned int s_x4; \
vector signed int i_x4; \
vector unsigned int ibyte, iline; \
vector unsigned int ret; \
unsigned int idx0, idx1, idx2, idx3; \
\
_spe_cache_nway_lookup_x4(ea_x4, s_x4, i_x4); \
missing = spu_rlmask ((vector unsigned int)i_x4, -8); \
ms = spu_extract (spu_gather (missing), 0); \
\
ibyte = _spe_cacheline_byte_offset_x4(ea_x4); \
\
iline = _spe_cacheline_num_x4(s_x4, \
(vector unsigned int)i_x4); \
\
cindex = spu_add (iline, ibyte); \
\
idx0 = spu_extract (cindex, 0); \
idx1 = spu_extract (cindex, 1); \
idx2 = spu_extract (cindex, 2); \
idx3 = spu_extract (cindex, 3); \
\
d0 = *((type *) (&spe_cache_mem[idx0])); \
d1 = *((type *) (&spe_cache_mem[idx1])); \
d2 = *((type *) (&spe_cache_mem[idx2])); \
d3 = *((type *) (&spe_cache_mem[idx3])); \
\
ret = _load_vec_uint4 (d0, d1, d2, d3); \
\
if (unlikely(ms)) { \
int b0 = spu_extract (ibyte, 0); \
int b1 = spu_extract (ibyte, 1); \
int b2 = spu_extract (ibyte, 2); \
int b3 = spu_extract (ibyte, 3); \
int lnum0; \
int lnum1; \
int lnum2; \
int lnum3; \
int s0 = spu_extract (s_x4, 0); \
int s1 = spu_extract (s_x4, 1); \
int s2 = spu_extract (s_x4, 2); \
int s3 = spu_extract (s_x4, 3); \
int i0 = spu_extract (i_x4, 0); \
int i1 = spu_extract (i_x4, 1); \
int i2 = spu_extract (i_x4, 2); \
int i3 = spu_extract (i_x4, 3); \
unsigned int ea0 = spu_extract(ea_x4, 0); \
unsigned int ea1 = spu_extract(ea_x4, 1); \
unsigned int ea2 = spu_extract(ea_x4, 2); \
unsigned int ea3 = spu_extract(ea_x4, 3); \
int avail = -1; \
\
avail &= ~(((i0 < 0) ? 0 : (1 << i0)) | \
((i1 < 0) ? 0 : (1 << i1)) | \
((i2 < 0) ? 0 : (1 << i2)) | \
((i3 < 0) ? 0 : (1 << i3))); \
\
i0 = _spe_cache_miss_(ea0, s0, avail); \
avail &= ~(1 << i0); \
i1 = _spe_cache_miss_(ea1, s1, avail); \
avail &= ~(1 << i1); \
i2 = _spe_cache_miss_(ea2, s2, avail); \
avail &= ~(1 << i2); \
i3 = _spe_cache_miss_(ea3, s3, avail); \
\
lnum0 = _spe_cacheline_num_(s0, i0); \
lnum1 = _spe_cacheline_num_(s1, i1); \
lnum2 = _spe_cacheline_num_(s2, i2); \
lnum3 = _spe_cacheline_num_(s3, i3); \
\
spu_writech(22, SPE_CACHE_SET_TAGMASK(set)); \
spu_mfcstat(MFC_TAG_UPDATE_ALL); \
\
d0 = *((type *) (&spe_cache_mem[lnum0 + b0])); \
d1 = *((type *) (&spe_cache_mem[lnum1 + b1])); \
d2 = *((type *) (&spe_cache_mem[lnum2 + b2])); \
d3 = *((type *) (&spe_cache_mem[lnum3 + b3])); \
\
ret = _load_vec_uint4 (d0, d1, d2, d3); \
} \
ret; \
})
#endif /* _SPE_CACHE_NWAY_OPT_H_ */

View File

@@ -1,38 +1,38 @@
/* --------------------------------------------------------------- */
/* PLEASE DO NOT MODIFY THIS SECTION */
/* This prolog section is automatically generated. */
/* */
/* (C) Copyright 2001,2006, */
/* International Business Machines Corporation, */
/* */
/* All Rights Reserved. */
/* --------------------------------------------------------------- */
/* PROLOG END TAG zYx */
/* nway-replace.h
*
* Copyright (C) 2005 IBM Corp.
*
* Implement replacement for software
* managed cache.
*/
#ifndef __SPE_CACHE_NWAY_REPLACE_H_
#define __SPE_CACHE_NWAY_REPLACE_H_
static vec_uint4 spe_cache_replace_cntr[SPE_CACHE_NSETS+1];
static inline vec_uint4 _spe_cache_replace_(int set, int avail)
{
unsigned int mask = ((1 << SPE_CACHE_NWAY) - 1) & avail;
unsigned int curr, currbit, next;
curr = spu_extract(spe_cache_replace_cntr[set], 0) & SPE_CACHE_NWAY_MASK;
currbit = (1 << curr);
next = (curr + 1) & SPE_CACHE_NWAY_MASK;
spe_cache_replace_cntr[set] = (vec_uint4) spu_promote(next, 0);
mask = (mask & currbit) ? currbit : mask;
return (vec_uint4) spu_promote(mask, 0);
}
#endif
/* --------------------------------------------------------------- */
/* PLEASE DO NOT MODIFY THIS SECTION */
/* This prolog section is automatically generated. */
/* */
/* (C) Copyright 2001,2006, */
/* International Business Machines Corporation, */
/* */
/* All Rights Reserved. */
/* --------------------------------------------------------------- */
/* PROLOG END TAG zYx */
/* nway-replace.h
*
* Copyright (C) 2005 IBM Corp.
*
* Implement replacement for software
* managed cache.
*/
#ifndef __SPE_CACHE_NWAY_REPLACE_H_
#define __SPE_CACHE_NWAY_REPLACE_H_
static vec_uint4 spe_cache_replace_cntr[SPE_CACHE_NSETS+1];
static inline vec_uint4 _spe_cache_replace_(int set, int avail)
{
unsigned int mask = ((1 << SPE_CACHE_NWAY) - 1) & avail;
unsigned int curr, currbit, next;
curr = spu_extract(spe_cache_replace_cntr[set], 0) & SPE_CACHE_NWAY_MASK;
currbit = (1 << curr);
next = (curr + 1) & SPE_CACHE_NWAY_MASK;
spe_cache_replace_cntr[set] = (vec_uint4) spu_promote(next, 0);
mask = (mask & currbit) ? currbit : mask;
return (vec_uint4) spu_promote(mask, 0);
}
#endif

View File

@@ -1,105 +1,105 @@
/* --------------------------------------------------------------- */
/* PLEASE DO NOT MODIFY THIS SECTION */
/* This prolog section is automatically generated. */
/* */
/* (C) Copyright 2001,2006, */
/* International Business Machines Corporation, */
/* */
/* All Rights Reserved. */
/* --------------------------------------------------------------- */
/* PROLOG END TAG zYx */
/* nway.h
*
* Copyright (C) 2005 IBM Corp.
*
* Support for n-way set associative software
* managed cache. The 4-way associative cache
* is the only interface exposed currently.
*/
#ifndef __SPE_CACHE_NWAY_H_
#define __SPE_CACHE_NWAY_H_
/**
** Defn's for n-way set associativity.
** Default is 4-way.
*/
#define SPE_CACHE_NWAY 4
#define SPE_CACHE_NWAY_SHIFT 2
#define SPE_CACHE_NWAY_MASK (SPE_CACHE_NWAY - 1)
#define SPE_CACHE_NENTRIES (SPE_CACHE_NWAY * SPE_CACHE_NSETS)
#define SPE_CACHE_MEM_SIZE (SPE_CACHE_NENTRIES * SPE_CACHELINE_SIZE)
#define _spe_cache_set_num_(ea) \
({ \
unsigned int ead, eadm, ret; \
ead = ((ea) >> SPE_CACHELINE_SHIFT); \
eadm = ((ea) >> (SPE_CACHELINE_SHIFT+2)); \
ret = (ead ^ eadm) & SPE_CACHE_NSETS_MASK; \
ret; \
})
#define _spe_cache_set_num_x4(ea_x4) \
({ \
vector unsigned int tmp0; \
vector unsigned int tmp1; \
tmp0 = spu_rlmask (ea_x4, -SPE_CACHELINE_SHIFT); \
tmp1 = spu_rlmask (ea_x4, -(SPE_CACHELINE_SHIFT+1)); \
spu_and (spu_xor (tmp0, tmp1), SPE_CACHE_NSETS_MASK); \
})
#define _spe_cache_idx_num_x4(found) \
spu_sub((unsigned int) 31, spu_cntlz(found))
#define _spe_cache_idx_num_(found) \
spu_extract(spu_sub((unsigned int) 31, spu_cntlz(found)), 0)
#define _spe_cacheline_num_(set, idx) \
(((set << SPE_CACHE_NWAY_SHIFT) + idx) << SPE_CACHELINE_SHIFT)
#define _spe_cacheline_num_x4(set, idx) \
spu_sl (spu_add (spu_sl (set, SPE_CACHE_NWAY_SHIFT), idx), SPE_CACHELINE_SHIFT)
#define _spe_cacheline_is_dirty_(set, idx) \
(spe_cache_dir[set][SPE_CACHE_NWAY_MASK-(idx)] & SPE_CACHELINE_DIRTY)
#define _spe_cacheline_is_locked_(set, idx) \
(spe_cache_dir[set][SPE_CACHE_NWAY_MASK-(idx)] & SPE_CACHELINE_LOCKED)
#define _spe_lock_cacheline_(set, idx) \
spe_cache_dir[set][SPE_CACHE_NWAY_MASK-(idx)] |= SPE_CACHELINE_LOCKED
#define _spe_unlock_cacheline_(set, idx) \
spe_cache_dir[set][SPE_CACHE_NWAY_MASK-(idx)] &= ~SPE_CACHELINE_LOCKED
/**
* spe_cache_dir
* This is the n-way set associative cache
* directory. Entries are either zero (unused)
* or non-zero (used).
*
* State for one additional (dummy) set is
* allocated to improve efficiency of cache
* line locking.
* volatile seems not to be necessary here, the SCE toolchain guarantees a barrier after dma transfer
*/
static unsigned int spe_cache_dir[SPE_CACHE_NSETS+1][SPE_CACHE_NWAY]
__attribute__ ((aligned(16)));
/**
* spe_cache_mem
* A contiguous set of cachelines in LS memory,
* one line for each entry in the cache.
* volatile seems not to be necessary here, the SCE toolchain guarantees a barrier after dma transfer
*/
static char spe_cache_mem[SPE_CACHE_MEM_SIZE]
__attribute__ ((aligned(128)));
#include "nway-lookup.h"
#include "nway-replace.h"
#include "nway-miss.h"
#include "nway-opt.h"
#endif
/* --------------------------------------------------------------- */
/* PLEASE DO NOT MODIFY THIS SECTION */
/* This prolog section is automatically generated. */
/* */
/* (C) Copyright 2001,2006, */
/* International Business Machines Corporation, */
/* */
/* All Rights Reserved. */
/* --------------------------------------------------------------- */
/* PROLOG END TAG zYx */
/* nway.h
*
* Copyright (C) 2005 IBM Corp.
*
* Support for n-way set associative software
* managed cache. The 4-way associative cache
* is the only interface exposed currently.
*/
#ifndef __SPE_CACHE_NWAY_H_
#define __SPE_CACHE_NWAY_H_
/**
** Defn's for n-way set associativity.
** Default is 4-way.
*/
#define SPE_CACHE_NWAY 4
#define SPE_CACHE_NWAY_SHIFT 2
#define SPE_CACHE_NWAY_MASK (SPE_CACHE_NWAY - 1)
#define SPE_CACHE_NENTRIES (SPE_CACHE_NWAY * SPE_CACHE_NSETS)
#define SPE_CACHE_MEM_SIZE (SPE_CACHE_NENTRIES * SPE_CACHELINE_SIZE)
#define _spe_cache_set_num_(ea) \
({ \
unsigned int ead, eadm, ret; \
ead = ((ea) >> SPE_CACHELINE_SHIFT); \
eadm = ((ea) >> (SPE_CACHELINE_SHIFT+2)); \
ret = (ead ^ eadm) & SPE_CACHE_NSETS_MASK; \
ret; \
})
#define _spe_cache_set_num_x4(ea_x4) \
({ \
vector unsigned int tmp0; \
vector unsigned int tmp1; \
tmp0 = spu_rlmask (ea_x4, -SPE_CACHELINE_SHIFT); \
tmp1 = spu_rlmask (ea_x4, -(SPE_CACHELINE_SHIFT+1)); \
spu_and (spu_xor (tmp0, tmp1), SPE_CACHE_NSETS_MASK); \
})
#define _spe_cache_idx_num_x4(found) \
spu_sub((unsigned int) 31, spu_cntlz(found))
#define _spe_cache_idx_num_(found) \
spu_extract(spu_sub((unsigned int) 31, spu_cntlz(found)), 0)
#define _spe_cacheline_num_(set, idx) \
(((set << SPE_CACHE_NWAY_SHIFT) + idx) << SPE_CACHELINE_SHIFT)
#define _spe_cacheline_num_x4(set, idx) \
spu_sl (spu_add (spu_sl (set, SPE_CACHE_NWAY_SHIFT), idx), SPE_CACHELINE_SHIFT)
#define _spe_cacheline_is_dirty_(set, idx) \
(spe_cache_dir[set][SPE_CACHE_NWAY_MASK-(idx)] & SPE_CACHELINE_DIRTY)
#define _spe_cacheline_is_locked_(set, idx) \
(spe_cache_dir[set][SPE_CACHE_NWAY_MASK-(idx)] & SPE_CACHELINE_LOCKED)
#define _spe_lock_cacheline_(set, idx) \
spe_cache_dir[set][SPE_CACHE_NWAY_MASK-(idx)] |= SPE_CACHELINE_LOCKED
#define _spe_unlock_cacheline_(set, idx) \
spe_cache_dir[set][SPE_CACHE_NWAY_MASK-(idx)] &= ~SPE_CACHELINE_LOCKED
/**
* spe_cache_dir
* This is the n-way set associative cache
* directory. Entries are either zero (unused)
* or non-zero (used).
*
* State for one additional (dummy) set is
* allocated to improve efficiency of cache
* line locking.
* volatile seems not to be necessary here, the SCE toolchain guarantees a barrier after dma transfer
*/
static unsigned int spe_cache_dir[SPE_CACHE_NSETS+1][SPE_CACHE_NWAY]
__attribute__ ((aligned(16)));
/**
* spe_cache_mem
* A contiguous set of cachelines in LS memory,
* one line for each entry in the cache.
* volatile seems not to be necessary here, the SCE toolchain guarantees a barrier after dma transfer
*/
static char spe_cache_mem[SPE_CACHE_MEM_SIZE]
__attribute__ ((aligned(128)));
#include "nway-lookup.h"
#include "nway-replace.h"
#include "nway-miss.h"
#include "nway-opt.h"
#endif

View File

@@ -1,32 +1,32 @@
/* --------------------------------------------------------------- */
/* PLEASE DO NOT MODIFY THIS SECTION */
/* This prolog section is automatically generated. */
/* */
/* (C) Copyright 2001,2006, */
/* International Business Machines Corporation, */
/* */
/* All Rights Reserved. */
/* --------------------------------------------------------------- */
/* PROLOG END TAG zYx */
/* spe_cache.h
*
* Copyright (C) 2005 IBM Corp.
*
* Top level include file implementing
* software managed cache.
*/
#ifndef __SPE_CACHE_H__
#define __SPE_CACHE_H__ 1
#include "vec_literal.h"
#include "ilog2.h"
#include "memset.h"
//#include <cbe_mfc.h>
#include "defs.h"
#include "dma.h"
#include "nway.h"
#include "api.h"
#endif
/* --------------------------------------------------------------- */
/* PLEASE DO NOT MODIFY THIS SECTION */
/* This prolog section is automatically generated. */
/* */
/* (C) Copyright 2001,2006, */
/* International Business Machines Corporation, */
/* */
/* All Rights Reserved. */
/* --------------------------------------------------------------- */
/* PROLOG END TAG zYx */
/* spe_cache.h
*
* Copyright (C) 2005 IBM Corp.
*
* Top level include file implementing
* software managed cache.
*/
#ifndef __SPE_CACHE_H__
#define __SPE_CACHE_H__ 1
#include "vec_literal.h"
#include "ilog2.h"
#include "memset.h"
//#include <cbe_mfc.h>
#include "defs.h"
#include "dma.h"
#include "nway.h"
#include "api.h"
#endif

View File

@@ -1,74 +1,74 @@
/* @(#)86 1.3 src/include/vec_literal.h, sw.includes, sdk_pub 10/11/05 16:00:27 */
/* -------------------------------------------------------------- */
/* (C) Copyright 2001,2005, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment Incorporated, */
/* Toshiba Corporation. */
/* */
/* All Rights Reserved. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifndef _VEC_LITERAL_H_
#define _VEC_LITERAL_H_
/* This header files provides an abstraction for the various implementations
* of vector literal construction. The two formats are:
*
* 1) Altivec styled using parenthesis
* 2) C grammer friendly styled using curly braces
*
* The macro, VEC_LITERAL has been developed to provide some portability
* in these two styles. To achieve true portability, user must specify all
* elements of the vector being initialized. A single element can be provided
* but only the first element guarenteed across both construction styles.
*
* The VEC_SPLAT_* macros have been provided for portability of vector literal
* construction when all the elements of the vector contain the same value.
*/
#ifdef __SPU__
#include <spu_intrinsics.h>
#endif
#ifdef __ALTIVEC_LITERAL_STYLE__
/* Use altivec style.
*/
#define VEC_LITERAL(_type, ...) ((_type)(__VA_ARGS__))
#define VEC_SPLAT_U8(_val) ((vector unsigned char)(_val))
#define VEC_SPLAT_S8(_val) ((vector signed char)(_val))
#define VEC_SPLAT_U16(_val) ((vector unsigned short)(_val))
#define VEC_SPLAT_S16(_val) ((vector signed short)(_val))
#define VEC_SPLAT_U32(_val) ((vector unsigned int)(_val))
#define VEC_SPLAT_S32(_val) ((vector signed int)(_val))
#define VEC_SPLAT_F32(_val) ((vector float)(_val))
#define VEC_SPLAT_U64(_val) ((vector unsigned long long)(_val))
#define VEC_SPLAT_S64(_val) ((vector signed long long)(_val))
#define VEC_SPLAT_F64(_val) ((vector double)(_val))
#else
/* Use curly brace style.
*/
#define VEC_LITERAL(_type, ...) ((_type){__VA_ARGS__})
#define VEC_SPLAT_U8(_val) ((vector unsigned char){_val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val})
#define VEC_SPLAT_S8(_val) ((vector signed char){_val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val})
#define VEC_SPLAT_U16(_val) ((vector unsigned short){_val, _val, _val, _val, _val, _val, _val, _val})
#define VEC_SPLAT_S16(_val) ((vector signed short){_val, _val, _val, _val, _val, _val, _val, _val})
#define VEC_SPLAT_U32(_val) ((vector unsigned int){_val, _val, _val, _val})
#define VEC_SPLAT_S32(_val) ((vector signed int){_val, _val, _val, _val})
#define VEC_SPLAT_F32(_val) ((vector float){_val, _val, _val, _val})
#define VEC_SPLAT_U64(_val) ((vector unsigned long long){_val, _val})
#define VEC_SPLAT_S64(_val) ((vector signed long long){_val, _val})
#define VEC_SPLAT_F64(_val) ((vector double){_val, _val})
#endif
#endif /* _VEC_LITERAL_H_ */
/* @(#)86 1.3 src/include/vec_literal.h, sw.includes, sdk_pub 10/11/05 16:00:27 */
/* -------------------------------------------------------------- */
/* (C) Copyright 2001,2005, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment Incorporated, */
/* Toshiba Corporation. */
/* */
/* All Rights Reserved. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifndef _VEC_LITERAL_H_
#define _VEC_LITERAL_H_
/* This header files provides an abstraction for the various implementations
* of vector literal construction. The two formats are:
*
* 1) Altivec styled using parenthesis
* 2) C grammer friendly styled using curly braces
*
* The macro, VEC_LITERAL has been developed to provide some portability
* in these two styles. To achieve true portability, user must specify all
* elements of the vector being initialized. A single element can be provided
* but only the first element guarenteed across both construction styles.
*
* The VEC_SPLAT_* macros have been provided for portability of vector literal
* construction when all the elements of the vector contain the same value.
*/
#ifdef __SPU__
#include <spu_intrinsics.h>
#endif
#ifdef __ALTIVEC_LITERAL_STYLE__
/* Use altivec style.
*/
#define VEC_LITERAL(_type, ...) ((_type)(__VA_ARGS__))
#define VEC_SPLAT_U8(_val) ((vector unsigned char)(_val))
#define VEC_SPLAT_S8(_val) ((vector signed char)(_val))
#define VEC_SPLAT_U16(_val) ((vector unsigned short)(_val))
#define VEC_SPLAT_S16(_val) ((vector signed short)(_val))
#define VEC_SPLAT_U32(_val) ((vector unsigned int)(_val))
#define VEC_SPLAT_S32(_val) ((vector signed int)(_val))
#define VEC_SPLAT_F32(_val) ((vector float)(_val))
#define VEC_SPLAT_U64(_val) ((vector unsigned long long)(_val))
#define VEC_SPLAT_S64(_val) ((vector signed long long)(_val))
#define VEC_SPLAT_F64(_val) ((vector double)(_val))
#else
/* Use curly brace style.
*/
#define VEC_LITERAL(_type, ...) ((_type){__VA_ARGS__})
#define VEC_SPLAT_U8(_val) ((vector unsigned char){_val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val})
#define VEC_SPLAT_S8(_val) ((vector signed char){_val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val})
#define VEC_SPLAT_U16(_val) ((vector unsigned short){_val, _val, _val, _val, _val, _val, _val, _val})
#define VEC_SPLAT_S16(_val) ((vector signed short){_val, _val, _val, _val, _val, _val, _val, _val})
#define VEC_SPLAT_U32(_val) ((vector unsigned int){_val, _val, _val, _val})
#define VEC_SPLAT_S32(_val) ((vector signed int){_val, _val, _val, _val})
#define VEC_SPLAT_F32(_val) ((vector float){_val, _val, _val, _val})
#define VEC_SPLAT_U64(_val) ((vector unsigned long long){_val, _val})
#define VEC_SPLAT_S64(_val) ((vector signed long long){_val, _val})
#define VEC_SPLAT_F64(_val) ((vector double){_val, _val})
#endif
#endif /* _VEC_LITERAL_H_ */