diff --git a/Extras/software_cache/Makefile b/Extras/software_cache/Makefile
new file mode 100644
index 000000000..6d1bbc505
--- /dev/null
+++ b/Extras/software_cache/Makefile
@@ -0,0 +1,14 @@
+# SCE CONFIDENTIAL
+# PLAYSTATION(R)3 Programmer Tool Runtime Library 085.007
+# Copyright (C) 2005 Sony Computer Entertainment Inc.
+# All Rights Reserved.
+#
+
+CELL_MK_DIR ?= $(CELL_SDK)/samples/mk
+include $(CELL_MK_DIR)/sdk.makedef.mk
+
+MK_TARGET = spu_thr_printf.ppu.mk hello.spu.mk
+
+include $(CELL_MK_DIR)/sdk.target.mk
+
+
diff --git a/Extras/software_cache/SPU_printf/SPU_printf.vcproj b/Extras/software_cache/SPU_printf/SPU_printf.vcproj
new file mode 100644
index 000000000..072afd444
--- /dev/null
+++ b/Extras/software_cache/SPU_printf/SPU_printf.vcproj
@@ -0,0 +1,239 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/Extras/software_cache/cache/CommonPublicLicense-1.0 b/Extras/software_cache/cache/CommonPublicLicense-1.0
new file mode 100644
index 000000000..5723258b5
--- /dev/null
+++ b/Extras/software_cache/cache/CommonPublicLicense-1.0
@@ -0,0 +1,213 @@
+Common Public License Version 1.0
+
+THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS COMMON PUBLIC
+LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM
+CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.
+
+1. DEFINITIONS
+
+"Contribution" means:
+
+ a) in the case of the initial Contributor, the initial code and
+documentation distributed under this Agreement, and
+
+ b) in the case of each subsequent Contributor:
+
+ i) changes to the Program, and
+
+ ii) additions to the Program;
+
+ where such changes and/or additions to the Program originate from and are
+distributed by that particular Contributor. A Contribution 'originates' from a
+Contributor if it was added to the Program by such Contributor itself or anyone
+acting on such Contributor's behalf. Contributions do not include additions to
+the Program which: (i) are separate modules of software distributed in
+conjunction with the Program under their own license agreement, and (ii) are not
+derivative works of the Program.
+
+"Contributor" means any person or entity that distributes the Program.
+
+"Licensed Patents " mean patent claims licensable by a Contributor which are
+necessarily infringed by the use or sale of its Contribution alone or when
+combined with the Program.
+
+"Program" means the Contributions distributed in accordance with this Agreement.
+
+"Recipient" means anyone who receives the Program under this Agreement,
+including all Contributors.
+
+2. GRANT OF RIGHTS
+
+ a) Subject to the terms of this Agreement, each Contributor hereby grants
+Recipient a non-exclusive, worldwide, royalty-free copyright license to
+reproduce, prepare derivative works of, publicly display, publicly perform,
+distribute and sublicense the Contribution of such Contributor, if any, and such
+derivative works, in source code and object code form.
+
+ b) Subject to the terms of this Agreement, each Contributor hereby grants
+Recipient a non-exclusive, worldwide, royalty-free patent license under Licensed
+Patents to make, use, sell, offer to sell, import and otherwise transfer the
+Contribution of such Contributor, if any, in source code and object code form.
+This patent license shall apply to the combination of the Contribution and the
+Program if, at the time the Contribution is added by the Contributor, such
+addition of the Contribution causes such combination to be covered by the
+Licensed Patents. The patent license shall not apply to any other combinations
+which include the Contribution. No hardware per se is licensed hereunder.
+
+ c) Recipient understands that although each Contributor grants the licenses
+to its Contributions set forth herein, no assurances are provided by any
+Contributor that the Program does not infringe the patent or other intellectual
+property rights of any other entity. Each Contributor disclaims any liability to
+Recipient for claims brought by any other entity based on infringement of
+intellectual property rights or otherwise. As a condition to exercising the
+rights and licenses granted hereunder, each Recipient hereby assumes sole
+responsibility to secure any other intellectual property rights needed, if any.
+For example, if a third party patent license is required to allow Recipient to
+distribute the Program, it is Recipient's responsibility to acquire that license
+before distributing the Program.
+
+ d) Each Contributor represents that to its knowledge it has sufficient
+copyright rights in its Contribution, if any, to grant the copyright license set
+forth in this Agreement.
+
+3. REQUIREMENTS
+
+A Contributor may choose to distribute the Program in object code form under its
+own license agreement, provided that:
+
+ a) it complies with the terms and conditions of this Agreement; and
+
+ b) its license agreement:
+
+ i) effectively disclaims on behalf of all Contributors all warranties and
+conditions, express and implied, including warranties or conditions of title and
+non-infringement, and implied warranties or conditions of merchantability and
+fitness for a particular purpose;
+
+ ii) effectively excludes on behalf of all Contributors all liability for
+damages, including direct, indirect, special, incidental and consequential
+damages, such as lost profits;
+
+ iii) states that any provisions which differ from this Agreement are offered
+by that Contributor alone and not by any other party; and
+
+ iv) states that source code for the Program is available from such
+Contributor, and informs licensees how to obtain it in a reasonable manner on or
+through a medium customarily used for software exchange.
+
+When the Program is made available in source code form:
+
+ a) it must be made available under this Agreement; and
+
+ b) a copy of this Agreement must be included with each copy of the Program.
+
+Contributors may not remove or alter any copyright notices contained within the
+Program.
+
+Each Contributor must identify itself as the originator of its Contribution, if
+any, in a manner that reasonably allows subsequent Recipients to identify the
+originator of the Contribution.
+
+4. COMMERCIAL DISTRIBUTION
+
+Commercial distributors of software may accept certain responsibilities with
+respect to end users, business partners and the like. While this license is
+intended to facilitate the commercial use of the Program, the Contributor who
+includes the Program in a commercial product offering should do so in a manner
+which does not create potential liability for other Contributors. Therefore, if
+a Contributor includes the Program in a commercial product offering, such
+Contributor ("Commercial Contributor") hereby agrees to defend and indemnify
+every other Contributor ("Indemnified Contributor") against any losses, damages
+and costs (collectively "Losses") arising from claims, lawsuits and other legal
+actions brought by a third party against the Indemnified Contributor to the
+extent caused by the acts or omissions of such Commercial Contributor in
+connection with its distribution of the Program in a commercial product
+offering. The obligations in this section do not apply to any claims or Losses
+relating to any actual or alleged intellectual property infringement. In order
+to qualify, an Indemnified Contributor must: a) promptly notify the Commercial
+Contributor in writing of such claim, and b) allow the Commercial Contributor to
+control, and cooperate with the Commercial Contributor in, the defense and any
+related settlement negotiations. The Indemnified Contributor may participate in
+any such claim at its own expense.
+
+For example, a Contributor might include the Program in a commercial product
+offering, Product X. That Contributor is then a Commercial Contributor. If that
+Commercial Contributor then makes performance claims, or offers warranties
+related to Product X, those performance claims and warranties are such
+Commercial Contributor's responsibility alone. Under this section, the
+Commercial Contributor would have to defend claims against the other
+Contributors related to those performance claims and warranties, and if a court
+requires any other Contributor to pay any damages as a result, the Commercial
+Contributor must pay those damages.
+
+5. NO WARRANTY
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON AN
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR
+IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE,
+NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each
+Recipient is solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its exercise of
+rights under this Agreement, including but not limited to the risks and costs of
+program errors, compliance with applicable laws, damage to or loss of data,
+programs or equipment, and unavailability or interruption of operations.
+
+6. DISCLAIMER OF LIABILITY
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY
+CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST
+PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS
+GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+7. GENERAL
+
+If any provision of this Agreement is invalid or unenforceable under applicable
+law, it shall not affect the validity or enforceability of the remainder of the
+terms of this Agreement, and without further action by the parties hereto, such
+provision shall be reformed to the minimum extent necessary to make such
+provision valid and enforceable.
+
+If Recipient institutes patent litigation against a Contributor with respect to
+a patent applicable to software (including a cross-claim or counterclaim in a
+lawsuit), then any patent licenses granted by that Contributor to such Recipient
+under this Agreement shall terminate as of the date such litigation is filed. In
+addition, if Recipient institutes patent litigation against any entity
+(including a cross-claim or counterclaim in a lawsuit) alleging that the Program
+itself (excluding combinations of the Program with other software or hardware)
+infringes such Recipient's patent(s), then such Recipient's rights granted under
+Section 2(b) shall terminate as of the date such litigation is filed.
+
+All Recipient's rights under this Agreement shall terminate if it fails to
+comply with any of the material terms or conditions of this Agreement and does
+not cure such failure in a reasonable period of time after becoming aware of
+such noncompliance. If all Recipient's rights under this Agreement terminate,
+Recipient agrees to cease use and distribution of the Program as soon as
+reasonably practicable. However, Recipient's obligations under this Agreement
+and any licenses granted by Recipient relating to the Program shall continue and
+survive.
+
+Everyone is permitted to copy and distribute copies of this Agreement, but in
+order to avoid inconsistency the Agreement is copyrighted and may only be
+modified in the following manner. The Agreement Steward reserves the right to
+publish new versions (including revisions) of this Agreement from time to time.
+No one other than the Agreement Steward has the right to modify this Agreement.
+IBM is the initial Agreement Steward. IBM may assign the responsibility to serve
+as the Agreement Steward to a suitable separate entity. Each new version of the
+Agreement will be given a distinguishing version number. The Program (including
+Contributions) may always be distributed subject to the version of the Agreement
+under which it was received. In addition, after a new version of the Agreement
+is published, Contributor may elect to distribute the Program (including its
+Contributions) under the new version. Except as expressly stated in Sections
+2(a) and 2(b) above, Recipient receives no rights or licenses to the
+intellectual property of any Contributor under this Agreement, whether
+expressly, by implication, estoppel or otherwise. All rights in the Program not
+expressly granted under this Agreement are reserved.
+
+This Agreement is governed by the laws of the State of New York and the
+intellectual property laws of the United States of America. No party to this
+Agreement will bring a legal action under this Agreement more than one year
+after the cause of action arose. Each party waives its rights to a jury trial in
+any resulting litigation.
diff --git a/Extras/software_cache/cache/include/Makefile b/Extras/software_cache/cache/include/Makefile
new file mode 100644
index 000000000..27a5c4347
--- /dev/null
+++ b/Extras/software_cache/cache/include/Makefile
@@ -0,0 +1,28 @@
+# ---------------------------------------------------------------
+# PLEASE DO NOT MODIFY THIS SECTION
+# This prolog section is automatically generated.
+#
+# (C) Copyright 2001,2006,
+# International Business Machines Corporation,
+#
+# All Rights Reserved.
+# ---------------------------------------------------------------
+# PROLOG END TAG zYx
+
+########################################################################
+# Common Makefile
+########################################################################
+
+INSTALL_DIR = $(SDKINC_spu)/cache
+
+INSTALL_FILES := api.h \
+ defs.h \
+ dma.h \
+ nway.h \
+ nway-lookup.h \
+ nway-miss.h \
+ nway-opt.h \
+ nway-replace.h \
+ spe_cache.h
+
+include ../../../../../make.footer
diff --git a/Extras/software_cache/cache/include/README b/Extras/software_cache/cache/include/README
new file mode 100644
index 000000000..b0e532433
--- /dev/null
+++ b/Extras/software_cache/cache/include/README
@@ -0,0 +1,32 @@
+%% ---------------------------------------------------------------
+%% PLEASE DO NOT MODIFY THIS SECTION
+%% This prolog section is automatically generated.
+%%
+%% (C) Copyright 2001,2006,
+%% International Business Machines Corporation,
+%%
+%% All Rights Reserved.
+%% ---------------------------------------------------------------
+%% PROLOG END TAG zYx
+This directory contains implementation of software managed cache for
+SPE. Whenever possible, the cache interfaces are implemented as macros
+or inline-able functions.
+
+Depending on compile-time settings, different cache implementations can
+be selected.
+
+The include file heirarchy is:
+
+ + spe_cache.h Top level header.
+ |
+ + defs.h Common definitions.
+ + dma.h Initiate DMA transfers.
+ + nway.h Top level n-way header.
+ |
+ + nway-lookup.h n-way lookup operations.
+ + nway-miss.h n-way cache miss handler.
+ + nway-replace.h n-way cache replace handler.
+ + nway-opt.h "optimized" n-way interfaces.
+ |
+ + api.h Basic application interfaces.
+
diff --git a/Extras/software_cache/cache/include/api.h b/Extras/software_cache/cache/include/api.h
new file mode 100644
index 000000000..77594221b
--- /dev/null
+++ b/Extras/software_cache/cache/include/api.h
@@ -0,0 +1,31 @@
+/* --------------------------------------------------------------- */
+/* PLEASE DO NOT MODIFY THIS SECTION */
+/* This prolog section is automatically generated. */
+/* */
+/* (C) Copyright 2001,2006, */
+/* International Business Machines Corporation, */
+/* */
+/* All Rights Reserved. */
+/* --------------------------------------------------------------- */
+/* PROLOG END TAG zYx */
+/* api.h
+ *
+ * Copyright (C) 2005 IBM Corp.
+ *
+ * Simple API for software managed cache on SPEs.
+ * A sophisticated application would not use these,
+ * but rather use the low-level lookup functions.
+ */
+
+#ifndef __SPE_CACHE_API_H__
+#define __SPE_CACHE_API_H__
+
+typedef void *spe_cache_entry_t;
+
+#define spe_cache_rd(ea) _spe_cache_lookup_xfer_wait_(ea, 0, 1)
+#define spe_cache_tr(ea) _spe_cache_lookup_xfer_(ea, 0, 1)
+#define spe_cache_lr(ea) _spe_cache_lookup_(ea, 0)
+
+#define spe_cache_wait(entry) _spe_cache_wait_(entry)
+
+#endif
diff --git a/Extras/software_cache/cache/include/cbe_mfc.h b/Extras/software_cache/cache/include/cbe_mfc.h
new file mode 100644
index 000000000..c685118ac
--- /dev/null
+++ b/Extras/software_cache/cache/include/cbe_mfc.h
@@ -0,0 +1,245 @@
+/* @(#)17 1.4 src/include/cbe_mfc.h, sw.includes, sdk_pub 10/11/05 16:00:25 */
+/* -------------------------------------------------------------- */
+/* (C) Copyright 2001,2005, */
+/* International Business Machines Corporation, */
+/* Sony Computer Entertainment Incorporated, */
+/* Toshiba Corporation. */
+/* */
+/* All Rights Reserved. */
+/* -------------------------------------------------------------- */
+/* PROLOG END TAG zYx */
+#ifndef _CBEA_MFC_H_
+#define _CBEA_MFC_H_
+
+/* This header file contains various definitions related to the Memory Flow
+ * Controller (MFC) portion of the Cell Broadband Engine Architecture (CBEA).
+ */
+
+/**************************************/
+/* MFC DMA Command Opcode Definitions */
+/**************************************/
+
+/****************************************************************************/
+/* MFC DMA Command flags which identify classes of operations. */
+/****************************************************************************/
+/* Note: These flags may be used in conjunction with the base command types
+ * (i.e. MFC_PUT_CMD, MFC_PUTR_CMD, MFC_GET_CMD, and MFC_SNDSIG_CMD)
+ * to construct the various command permutations.
+ */
+
+#define MFC_BARRIER_ENABLE 0x01
+#define MFC_FENCE_ENABLE 0x02
+#define MFC_LIST_ENABLE 0x04 /* SPU Only */
+#define MFC_START_ENABLE 0x08 /* proxy Only */
+#define MFC_RESULT_ENABLE 0x10
+
+/****************************************************************************/
+/* MFC DMA Put Commands */
+/****************************************************************************/
+
+#define MFC_PUT_CMD 0x20
+#define MFC_PUTS_CMD 0x28 /* proxy Only */
+#define MFC_PUTR_CMD 0x30
+#define MFC_PUTF_CMD 0x22
+#define MFC_PUTB_CMD 0x21
+#define MFC_PUTFS_CMD 0x2A /* proxy Only */
+#define MFC_PUTBS_CMD 0x29 /* proxy Only */
+#define MFC_PUTRF_CMD 0x32
+#define MFC_PUTRB_CMD 0x31
+#define MFC_PUTL_CMD 0x24 /* SPU Only */
+#define MFC_PUTRL_CMD 0x34 /* SPU Only */
+#define MFC_PUTLF_CMD 0x26 /* SPU Only */
+#define MFC_PUTLB_CMD 0x25 /* SPU Only */
+#define MFC_PUTRLF_CMD 0x36 /* SPU Only */
+#define MFC_PUTRLB_CMD 0x35 /* SPU Only */
+
+/****************************************************************************/
+/* MFC DMA Get Commands */
+/****************************************************************************/
+
+#define MFC_GET_CMD 0x40
+#define MFC_GETS_CMD 0x48 /* proxy Only */
+#define MFC_GETF_CMD 0x42
+#define MFC_GETB_CMD 0x41
+#define MFC_GETFS_CMD 0x4A /* proxy Only */
+#define MFC_GETBS_CMD 0x49 /* proxy Only */
+#define MFC_GETL_CMD 0x44 /* SPU Only */
+#define MFC_GETLF_CMD 0x46 /* SPU Only */
+#define MFC_GETLB_CMD 0x45 /* SPU Only */
+
+/****************************************************************************/
+/* MFC DMA Storage Control Commands */
+/****************************************************************************/
+/* Note: These are only supported on implementations with a SL1 cache
+ * They are no-ops on the initial (CBE) implementation.
+ */
+
+#define MFC_SDCRT_CMD 0x80
+#define MFC_SDCRTST_CMD 0x81
+#define MFC_SDCRZ_CMD 0x89
+#define MFC_SDCRS_CMD 0x8D
+#define MFC_SDCRF_CMD 0x8F
+
+/****************************************************************************/
+/* MFC Synchronization Commands */
+/****************************************************************************/
+
+#define MFC_GETLLAR_CMD 0xD0 /* SPU Only */
+#define MFC_PUTLLC_CMD 0xB4 /* SPU Only */
+#define MFC_PUTLLUC_CMD 0xB0 /* SPU Only */
+#define MFC_PUTQLLUC_CMD 0xB8 /* SPU Only */
+
+#define MFC_SNDSIG_CMD 0xA0
+#define MFC_SNDSIGB_CMD 0xA1
+#define MFC_SNDSIGF_CMD 0xA2
+#define MFC_BARRIER_CMD 0xC0
+#define MFC_EIEIO_CMD 0xC8
+#define MFC_SYNC_CMD 0xCC
+
+
+/****************************************************************************/
+/* Definitions for constructing a 32-bit command word including the transfer
+ * and replacement class id and the command opcode.
+ */
+/****************************************************************************/
+#define MFC_TCLASS(_tid) ((_tid) << 24)
+#define MFC_RCLASS(_rid) ((_rid) << 16)
+
+#define MFC_CMD_WORD(_tid, _rid, _cmd) (MFC_TCLASS(_tid) | MFC_RCLASS(_rid) | (_cmd))
+
+/****************************************************************************/
+/* Definitions for constructing a 64-bit command word including the size, tag,
+ * transfer and replacement class id and the command opcode.
+ */
+/****************************************************************************/
+#define MFC_SIZE(_size) ((unsigned long long)(_size) << 48)
+#define MFC_TAG(_tag_id) ((unsigned long long)(_tag_id) << 32)
+#define MFC_TR_CMD(_trcmd) ((unsigned long long)(_trcmd))
+
+#define MFC_CMD_DWORD(_size, _tag_id, _trcmd) (MFC_SIZE(_size) | MFC_TAG(_tag_id) | MFC_TR_CMD(_trcmd))
+
+/****************************************************************************/
+/* Mask definitions for obtaining DMA commands and class ids from packed words.
+ */
+/****************************************************************************/
+#define MFC_CMD_MASK 0x0000FFFF
+#define MFC_CLASS_MASK 0x000000FF
+
+/****************************************************************************/
+/* DMA max/min size definitions. */
+/****************************************************************************/
+#define MFC_MIN_DMA_SIZE_SHIFT 4 /* 16 bytes */
+#define MFC_MAX_DMA_SIZE_SHIFT 14 /* 16384 bytes */
+
+#define MFC_MIN_DMA_SIZE (1 << MFC_MIN_DMA_SIZE_SHIFT)
+#define MFC_MAX_DMA_SIZE (1 << MFC_MAX_DMA_SIZE_SHIFT)
+
+#define MFC_MIN_DMA_SIZE_MASK (MFC_MIN_DMA_SIZE - 1)
+#define MFC_MAX_DMA_SIZE_MASK (MFC_MAX_DMA_SIZE - 1)
+
+#define MFC_MIN_DMA_LIST_SIZE 0x0008 /* 8 bytes */
+#define MFC_MAX_DMA_LIST_SIZE 0x4000 /* 16K bytes */
+
+/****************************************************************************/
+/* Mask definition for checking proper address alignment. */
+/****************************************************************************/
+#define MFC_ADDR_MATCH_MASK 0xF
+#define MFC_BEST_ADDR_ALIGNMENT 0x80
+
+/****************************************************************************/
+/* Definitions related to the Proxy DMA Command Status register (DMA_CMDStatus).
+ */
+/****************************************************************************/
+#define MFC_PROXY_DMA_CMD_ENQUEUE_SUCCESSFUL 0x00
+#define MFC_PROXY_DMA_CMD_SEQUENCE_ERROR 0x01
+#define MFC_PROXY_DMA_QUEUE_FULL 0x02
+
+/****************************************************************************/
+/* Definitions related to the DMA Queue Status register (DMA_QStatus). */
+/****************************************************************************/
+#define MFC_PROXY_MAX_QUEUE_SPACE 0x08
+#define MFC_PROXY_DMA_Q_EMPTY 0x80000000
+#define MFC_PROXY_DMA_Q_FREE_SPACE_MASK 0x0000FFFF
+
+#define MFC_SPU_MAX_QUEUE_SPACE 0x10
+
+/****************************************************************************/
+/* Definitions related to the Proxy Tag-Group Query-Type register
+ * (Prxy_QueryType).
+ */
+/****************************************************************************/
+#define MFC_PROXY_DMA_QUERYTYPE_ANY 0x1
+#define MFC_PROXY_DMA_QUERYTYPE_ALL 0x2
+
+/****************************************************************************/
+/* Definitions related to the Proxy Tag-Group Query-Mask (Prxy_QueryMask)
+ * and PU Tag Status (DMA_TagStatus) registers.
+ *
+ * NOTE: The only use the bottom 5 bits of the tag id value passed to insure
+ * a valid tag id is used.
+ */
+/****************************************************************************/
+
+#define MFC_TAGID_TO_TAGMASK(tag_id) (1 << (tag_id & 0x1F))
+
+/****************************************************************************/
+/* Definitions related to the Mailbox Status register (SPU_Mbox_Stat) and the
+ * depths of the outbound Mailbox Register (SPU_OutMbox), the outbound
+ * interrupting Mailbox Register (SPU_OutIntrMbox), and the inbound Mailbox
+ * Register (SPU_In_Mbox).
+ */
+/****************************************************************************/
+#define MFC_SPU_OUT_MBOX_COUNT_STATUS_MASK 0x000000FF
+#define MFC_SPU_OUT_MBOX_COUNT_STATUS_SHIFT 0x0
+#define MFC_SPU_IN_MBOX_COUNT_STATUS_MASK 0x0000FF00
+#define MFC_SPU_IN_MBOX_COUNT_STATUS_SHIFT 0x8
+#define MFC_SPU_OUT_INTR_MBOX_COUNT_STATUS_MASK 0x00FF0000
+#define MFC_SPU_OUT_INTR_MBOX_COUNT_STATUS_SHIFT 0x10
+
+/****************************************************************************/
+/* Definitions related to the SPC Multi Source Syncronization register
+ * (MFC_MSSync).
+ */
+/****************************************************************************/
+#define MFC_SPC_MSS_STATUS_MASK 0x1
+#define MFC_SPC_MSS_COMPLETE 0x0
+#define MFC_SPC_MSS_NOT_COMPLETE 0x1
+
+
+/*******************************************
+ * Channel Defines
+ *******************************************/
+
+/* Events Defines for channels:
+ * 0 (SPU_RdEventStat),
+ * 1 (SPU_WrEventMask), and
+ * 2 (SPU_WrEventAck).
+ */
+#define MFC_TAG_STATUS_UPDATE_EVENT 0x00000001
+#define MFC_LIST_STALL_NOTIFY_EVENT 0x00000002
+#define MFC_COMMAND_QUEUE_AVAILABLE_EVENT 0x00000008
+#define MFC_IN_MBOX_AVAILABLE_EVENT 0x00000010
+#define MFC_DECREMENTER_EVENT 0x00000020
+#define MFC_OUT_INTR_MBOX_AVAILABLE_EVENT 0x00000040
+#define MFC_OUT_MBOX_AVAILABLE_EVENT 0x00000080
+#define MFC_SIGNAL_NOTIFY_2_EVENT 0x00000100
+#define MFC_SIGNAL_NOTIFY_1_EVENT 0x00000200
+#define MFC_LLR_LOST_EVENT 0x00000400
+#define MFC_PRIV_ATTN_EVENT 0x00000800
+#define MFC_MULTI_SRC_SYNC_EVENT 0x00001000
+
+
+
+/* Tag Status Update defines for channel 23 (MFC_WrTagUpdate)
+ */
+#define MFC_TAG_UPDATE_IMMEDIATE 0x0
+#define MFC_TAG_UPDATE_ANY 0x1
+#define MFC_TAG_UPDATE_ALL 0x2
+
+/* Atomic Command Status defines for channel 27 (MFC_RdAtomicStat)
+ */
+#define MFC_PUTLLC_STATUS 0x00000001
+#define MFC_PUTLLUC_STATUS 0x00000002
+#define MFC_GETLLAR_STATUS 0x00000004
+
+#endif /* _CBEA_MFC_H_ */
diff --git a/Extras/software_cache/cache/include/defs.h b/Extras/software_cache/cache/include/defs.h
new file mode 100644
index 000000000..d15d9361a
--- /dev/null
+++ b/Extras/software_cache/cache/include/defs.h
@@ -0,0 +1,149 @@
+/* --------------------------------------------------------------- */
+/* PLEASE DO NOT MODIFY THIS SECTION */
+/* This prolog section is automatically generated. */
+/* */
+/* (C) Copyright 2001,2006, */
+/* International Business Machines Corporation, */
+/* */
+/* All Rights Reserved. */
+/* --------------------------------------------------------------- */
+/* PROLOG END TAG zYx */
+/* spe_cache_defs.h
+ *
+ * Copyright (C) 2005 IBM Corp.
+ *
+ * Internal definitions for software managed cache.
+ */
+
+#ifndef __SPE_CACHE_DEFS_H__
+#define __SPE_CACHE_DEFS_H__
+
+/**
+ ** Defn's for number of cache sets.
+ ** Default is 64 sets.
+ */
+#if (SPE_CACHE_NSETS==1024)
+#define SPE_CACHE_NSETS_SHIFT 10
+#elif (SPE_CACHE_NSETS==512)
+#define SPE_CACHE_NSETS_SHIFT 9
+#elif (SPE_CACHE_NSETS==256)
+#define SPE_CACHE_NSETS_SHIFT 8
+#elif (SPE_CACHE_NSETS==128)
+#define SPE_CACHE_NSETS_SHIFT 7
+#elif (SPE_CACHE_NSETS==64)
+#define SPE_CACHE_NSETS_SHIFT 6
+#elif (SPE_CACHE_NSETS==32)
+#define SPE_CACHE_NSETS_SHIFT 5
+#elif (SPE_CACHE_NSETS==16)
+#define SPE_CACHE_NSETS_SHIFT 4
+#elif (SPE_CACHE_NSETS==8)
+#define SPE_CACHE_NSETS_SHIFT 3
+#elif (SPE_CACHE_NSETS==4)
+#define SPE_CACHE_NSETS_SHIFT 2
+#elif (SPE_CACHE_NSETS==2)
+#define SPE_CACHE_NSETS_SHIFT 1
+#else
+#undef SPE_CACHE_NSETS
+#define SPE_CACHE_NSETS 64
+#define SPE_CACHE_NSETS_SHIFT 6
+#endif
+
+/**
+ ** Defn's for cachline size (bytes).
+ ** Default is 128 bytes.
+ */
+#if (SPE_CACHELINE_SIZE==512)
+#define SPE_CACHELINE_SHIFT 9
+#elif (SPE_CACHELINE_SIZE==256)
+#define SPE_CACHELINE_SHIFT 8
+#elif (SPE_CACHELINE_SIZE==128)
+#define SPE_CACHELINE_SHIFT 7
+#elif (SPE_CACHELINE_SIZE==64)
+#define SPE_CACHELINE_SHIFT 6
+#elif (SPE_CACHELINE_SIZE==32)
+#define SPE_CACHELINE_SHIFT 5
+#else
+#undef SPE_CACHELINE_SIZE
+#define SPE_CACHELINE_SIZE 128
+#define SPE_CACHELINE_SHIFT 7
+#endif
+
+/**
+ ** Defn's derived from above settings.
+ */
+#define SPE_CACHE_NSETS_MASK (SPE_CACHE_NSETS - 1)
+#define SPE_CACHELINE_MASK (SPE_CACHELINE_SIZE - 1)
+
+/**
+ ** Defn's for managing cacheline state.
+ */
+#define SPE_CACHELINE_DIRTY 0x1
+#define SPE_CACHELINE_LOCKED 0x2
+#define SPE_CACHELINE_STATE_MASK (SPE_CACHELINE_DIRTY | SPE_CACHELINE_LOCKED)
+
+#ifdef _XLC
+/**
+ * FIXME: For now disable manual branch hints
+ * on XLC due to performance degradation.
+ */
+#ifndef likely
+#define likely(_c) (_c)
+#define unlikely(_c) (_c)
+#endif
+
+#else /* !_XLC */
+
+#ifndef likely
+#define likely(_c) __builtin_expect((_c), 1)
+#define unlikely(_c) __builtin_expect((_c), 0)
+#endif
+#endif
+
+
+/**
+ ** Debug controls. Set -DNDEBUG to
+ ** disable both panic and assert.
+ */
+#include
+#define _spe_cache_panic_(c) assert(c)
+#ifdef SPE_CACHE_DBG
+#define _spe_cache_assert_(c) assert(c)
+#else
+#define _spe_cache_assert_(c) /* No-op. */
+#endif
+
+#define _spe_cacheline_byte_offset_(ea) \
+ ((ea) & SPE_CACHELINE_MASK)
+
+#define _spe_cacheline_byte_offset_x4(ea) \
+ spu_and ((ea), SPE_CACHELINE_MASK)
+
+#endif
+
+static __inline vector unsigned int _load_vec_uint4(unsigned int ui1, unsigned int ui2, unsigned int ui3, unsigned int ui4)
+{
+ vector unsigned int result;
+ vector unsigned int iv1, iv2, iv3, iv4;
+
+ vector unsigned char shuffle = VEC_LITERAL(vector unsigned char,
+ 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
+ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
+ iv1 = spu_promote(ui1, 0);
+ iv2 = spu_promote(ui2, 0);
+ iv3 = spu_promote(ui3, 0);
+ iv4 = spu_promote(ui4, 0);
+
+ result = spu_or(spu_shuffle(iv1, iv2, shuffle), spu_shuffle(iv3, iv4, spu_rlqwbyte(shuffle, 8)));
+ return (result);
+}
+
+static __inline vector unsigned int _pack_vec_uint4(vector unsigned int ui1, vector unsigned int ui2, vector unsigned int ui3, vector unsigned int ui4)
+{
+ vector unsigned int result;
+ vector unsigned char shuffle = VEC_LITERAL(vector unsigned char,
+ 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
+ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
+
+ result = spu_or(spu_shuffle(ui1, ui2, shuffle), spu_shuffle(ui3, ui4, spu_rlqwbyte(shuffle, 8)));
+ return (result);
+}
diff --git a/Extras/software_cache/cache/include/dma.h b/Extras/software_cache/cache/include/dma.h
new file mode 100644
index 000000000..fdcc8313c
--- /dev/null
+++ b/Extras/software_cache/cache/include/dma.h
@@ -0,0 +1,40 @@
+/* --------------------------------------------------------------- */
+/* PLEASE DO NOT MODIFY THIS SECTION */
+/* This prolog section is automatically generated. */
+/* */
+/* (C) Copyright 2001,2006, */
+/* International Business Machines Corporation, */
+/* */
+/* All Rights Reserved. */
+/* --------------------------------------------------------------- */
+/* PROLOG END TAG zYx */
+/* dma.h
+ *
+ * Copyright (C) 2005 IBM Corp.
+ *
+ * Internal DMA utilities for software
+ * managed cache.
+ */
+
+#ifndef __SPE_CACHE_DMA_H__
+#define __SPE_CACHE_DMA_H__
+
+#define SPE_CACHE_TAGID_SHIFT (SPE_CACHELINE_SHIFT + SPE_CACHE_NWAY_SHIFT)
+
+#define _SPE_CACHELINE_TAGID(_ptr) (16)
+#define _SPE_CACHELINE_TAGMASK(_ptr) (1 << 16)
+
+#define SPE_CACHELINE_TAGID(_line) \
+ _SPE_CACHELINE_TAGID(&spe_cache_mem[_line])
+#define SPE_CACHELINE_TAGMASK(_line) \
+ _SPE_CACHELINE_TAGMASK(&spe_cache_mem[_line])
+
+#ifndef SPE_CACHE_SET_TAGID
+#define SPE_CACHE_SET_TAGID(set) ((set) & 0x1f)
+#endif
+#define SPE_CACHE_SET_TAGMASK(set) (1 << SPE_CACHE_SET_TAGID(set))
+
+#define SPE_CACHE_PUT MFC_PUTF_CMD
+#define SPE_CACHE_GET MFC_GET_CMD
+
+#endif
diff --git a/Extras/software_cache/cache/include/ilog2.h b/Extras/software_cache/cache/include/ilog2.h
new file mode 100644
index 000000000..86a5ca865
--- /dev/null
+++ b/Extras/software_cache/cache/include/ilog2.h
@@ -0,0 +1,35 @@
+/* @(#)12 1.5 src/lib/math/ilog2.h, sw.lib, sdk_pub 10/11/05 15:35:56 */
+/* -------------------------------------------------------------- */
+/* (C) Copyright 2001,2005, */
+/* International Business Machines Corporation, */
+/* Sony Computer Entertainment Incorporated, */
+/* Toshiba Corporation. */
+/* */
+/* All Rights Reserved. */
+/* -------------------------------------------------------------- */
+/* PROLOG END TAG zYx */
+#ifndef _ILOG2_H_
+#define _ILOG2_H_ 1
+
+/*
+ * FUNCTION
+ * signed int _ilog2(signed int x)
+ *
+ * DESCRIPTION
+ * _ilog2 computes ceiling of log (base 2) of the input value x.
+ * The input value, x, must be a non-zero positive value.
+ */
+
+static __inline signed int _ilog2(signed int x)
+{
+#ifdef __SPU__
+ return (32 - spu_extract(spu_cntlz(spu_promote(x - 1, 0)), 0));
+#else
+ signed int result;
+
+ for (result=0, x--; x > 0; result++, x>>=1);
+ return (result);
+#endif
+}
+
+#endif /* _ILOG2_H_ */
diff --git a/Extras/software_cache/cache/include/memset.h b/Extras/software_cache/cache/include/memset.h
new file mode 100644
index 000000000..b070d3e35
--- /dev/null
+++ b/Extras/software_cache/cache/include/memset.h
@@ -0,0 +1,68 @@
+/* @(#)85 1.4 src/lib/c/memset.h, sw.lib, sdk_pub 10/13/05 10:17:09 */
+/* -------------------------------------------------------------- */
+/* (C) Copyright 2001,2005, */
+/* International Business Machines Corporation, */
+/* Sony Computer Entertainment Incorporated, */
+/* Toshiba Corporation. */
+/* */
+/* All Rights Reserved. */
+/* -------------------------------------------------------------- */
+/* PROLOG END TAG zYx */
+#include
+#include
+
+/* Fills the first n bytes of the memory area pointed to by s
+ * with the constant byte c. Returns a pointer to the memory area s.
+ */
+static __inline void * _memset(void *s, int c, size_t n)
+{
+ int skip, cnt, i;
+ vec_uchar16 *vs;
+ vec_uchar16 vc, mask;
+
+ vs = (vec_uchar16 *)(s);
+ vc = spu_splats((unsigned char)c);
+ cnt = (int)(n);
+
+ /* Handle any leading partial quadwords as well a
+ * very short settings (ie, such that the n characters
+ * all reside in a single quadword.
+ */
+ skip = (int)(s) & 15;
+ if (skip) {
+ mask = spu_rlmaskqwbyte((vec_uchar16)(-1), 0-skip);
+ cnt -= 16 - skip;
+ if (cnt < 0) {
+ mask = spu_and(mask, spu_slqwbyte((vec_uchar16)(-1), (unsigned int)(-cnt)));
+ }
+ *vs = spu_sel(*vs, vc, mask);
+ vs++;
+ }
+
+ /* Handle 8 quadwords at a time
+ */
+ for (i=127; i 0) {
+ mask = spu_slqwbyte((vec_uchar16)(-1), (unsigned int)(16-cnt));
+ *vs = spu_sel(*vs, vc, mask);
+ }
+
+ return (s);
+}
diff --git a/Extras/software_cache/cache/include/nway-lookup.h b/Extras/software_cache/cache/include/nway-lookup.h
new file mode 100644
index 000000000..33e552c6d
--- /dev/null
+++ b/Extras/software_cache/cache/include/nway-lookup.h
@@ -0,0 +1,194 @@
+/* --------------------------------------------------------------- */
+/* PLEASE DO NOT MODIFY THIS SECTION */
+/* This prolog section is automatically generated. */
+/* */
+/* (C) Copyright 2001,2006, */
+/* International Business Machines Corporation, */
+/* */
+/* All Rights Reserved. */
+/* --------------------------------------------------------------- */
+/* PROLOG END TAG zYx */
+/* nway-lookup.h
+ *
+ * Copyright (C) 2005 IBM Corp.
+ *
+ * Internal lookup operations for software
+ * managed cache.
+ *
+ * See nway-opt.h for "optimized" nway
+ * lookup operations.
+ */
+
+#ifndef __SPE_CACHE_NWAY_LOOKUP_H_
+#define __SPE_CACHE_NWAY_LOOKUP_H_
+
+
+/**
+ * _decl_set_entries_
+ * Load up set entries (by 4) from an n-way
+ * set associative cache. Mask off the dirty
+ * bit, as needed.
+ */
+#define _decl_set_entries_(set, name, index) \
+ vec_uint4 name = *((vec_uint4 *) &spe_cache_dir[set][index])
+
+
+#define _spe_cache_4_way_lookup_(set, ea) \
+({ \
+ _decl_set_entries_(set, e0123, 0); \
+ spu_gather(spu_cmpeq(e0123, ea)); \
+})
+
+/**
+ * _spe_cache_set_lookup_
+ * Compare 'ea' against all entries of
+ * a set, and return a result that is
+ * consistent with spu_gather().
+ */
+#define _spe_cache_set_lookup_(set, ea) \
+ _spe_cache_4_way_lookup_(set, ea)
+
+
+/**
+ * _spe_cache_nway_lookup_x4_
+ * Declare local variables and lookup four addresses
+ * in the n-way set associative cache. Upon return,
+ * 'idx_x4' contains the matching elements in the sets,
+ * or -1 if not found.
+ */
+#define _spe_cache_nway_lookup_x4(ea_x4, set_x4, idx_x4) \
+({ \
+ vector unsigned int ea_aligned_x4 = spu_and ((ea_x4), ~SPE_CACHELINE_MASK); \
+ vector unsigned char splat0 = VEC_LITERAL(vector unsigned char, \
+ 0x00, 0x01, 0x02, 0x03, \
+ 0x00, 0x01, 0x02, 0x03, \
+ 0x00, 0x01, 0x02, 0x03, \
+ 0x00, 0x01, 0x02, 0x03); \
+ vector unsigned char splat1 = VEC_LITERAL(vector unsigned char, \
+ 0x04, 0x05, 0x06, 0x07, \
+ 0x04, 0x05, 0x06, 0x07, \
+ 0x04, 0x05, 0x06, 0x07, \
+ 0x04, 0x05, 0x06, 0x07); \
+ vector unsigned char splat2 = VEC_LITERAL(vector unsigned char, \
+ 0x08, 0x09, 0x0a, 0x0b, \
+ 0x08, 0x09, 0x0a, 0x0b, \
+ 0x08, 0x09, 0x0a, 0x0b, \
+ 0x08, 0x09, 0x0a, 0x0b); \
+ vector unsigned char splat3 = VEC_LITERAL(vector unsigned char, \
+ 0x0c, 0x0d, 0x0e, 0x0f, \
+ 0x0c, 0x0d, 0x0e, 0x0f, \
+ 0x0c, 0x0d, 0x0e, 0x0f, \
+ 0x0c, 0x0d, 0x0e, 0x0f); \
+ vec_uint4 ea_aligned0 = spu_shuffle(ea_aligned_x4, ea_aligned_x4, splat0); \
+ vec_uint4 ea_aligned1 = spu_shuffle(ea_aligned_x4, ea_aligned_x4, splat1); \
+ vec_uint4 ea_aligned2 = spu_shuffle(ea_aligned_x4, ea_aligned_x4, splat2); \
+ vec_uint4 ea_aligned3 = spu_shuffle(ea_aligned_x4, ea_aligned_x4, splat3); \
+ vec_uint4 found0, found1, found2, found3; \
+ vec_uint4 found_x4; \
+ (set_x4) = _spe_cache_set_num_x4(ea_x4); \
+ found0 = _spe_cache_set_lookup_(spu_extract (set_x4, 0), ea_aligned0); \
+ found1 = _spe_cache_set_lookup_(spu_extract (set_x4, 1), ea_aligned1); \
+ found2 = _spe_cache_set_lookup_(spu_extract (set_x4, 2), ea_aligned2); \
+ found3 = _spe_cache_set_lookup_(spu_extract (set_x4, 3), ea_aligned3); \
+ found_x4 = _pack_vec_uint4 (found0, found1, found2, found3); \
+ (idx_x4) = (vector signed int)_spe_cache_idx_num_x4(found_x4); \
+})
+
+#define _spe_cache_nway_lookup_(ea, set, idx) \
+({ \
+ unsigned int ea_aligned = (ea) & ~SPE_CACHELINE_MASK; \
+ vec_uint4 ea_aligned4 = spu_splats(ea_aligned); \
+ vec_uint4 found; \
+ (set) = _spe_cache_set_num_(ea); \
+ found = _spe_cache_set_lookup_(set, ea_aligned4); \
+ (idx) = _spe_cache_idx_num_(found); \
+})
+
+/**
+ * _spe_cache_lookup_
+ * Lookup and return the LSA of an EA
+ * that is known to be in the cache.
+ */
+#define _spe_cache_lookup_(ea, is_write) \
+({ \
+ int set, idx, line, byte; \
+ _spe_cache_nway_lookup_(ea, set, idx); \
+ \
+ line = _spe_cacheline_num_(set, idx); \
+ byte = _spe_cacheline_byte_offset_(ea); \
+ (void *) &spe_cache_mem[line + byte]; \
+})
+
+/**
+ * _spe_cache_wait_
+ * Wait for transfer of a cache line
+ * to complete.
+ */
+#define _spe_cache_wait_(_lsa) \
+({ \
+ spu_writech(22, _SPE_CACHELINE_TAGMASK(_lsa)); \
+ spu_mfcstat(MFC_TAG_UPDATE_ALL); \
+})
+
+/**
+ * _spe_cache_lookup_wait_
+ * Lookup and return the LSA of an EA
+ * that is known to be in the cache,
+ * and guarantee that its transfer is
+ * complete.
+ */
+#define _spe_cache_lookup_wait_(ea, is_write) \
+({ \
+ int set, idx, line, byte; \
+ _spe_cache_nway_lookup_(ea, set, idx); \
+ \
+ line = _spe_cacheline_num_(set, idx); \
+ byte = _spe_cacheline_byte_offset_(ea); \
+ spu_writech(22, SPE_CACHE_SET_TAGMASK(set)); \
+ spu_mfcstat(MFC_TAG_UPDATE_ALL); \
+ (void *) &spe_cache_mem[line + byte]; \
+})
+
+/**
+ * _spe_cache_lookup_xfer_
+ * Lookup and return the LSA of an EA, where
+ * the line may either be in the cache or not.
+ * If not, initiate transfer but do not wait
+ * for completion.
+ */
+#define _spe_cache_lookup_xfer_(ea, is_write, rb) \
+({ \
+ int set, idx, line, byte; \
+ _spe_cache_nway_lookup_(ea, set, idx); \
+ \
+ if (unlikely(idx < 0)) { \
+ idx = _spe_cache_miss_(ea, set, -1); \
+ } \
+ line = _spe_cacheline_num_(set, idx); \
+ byte = _spe_cacheline_byte_offset_(ea); \
+ (void *) &spe_cache_mem[line + byte]; \
+})
+
+/**
+ * _spe_cache_lookup_xfer_wait_
+ * Lookup and return the LSA of an EA, where
+ * the line may either be in the cache or not.
+ * If not, initiate transfer and guarantee
+ * completion.
+ */
+#define _spe_cache_lookup_xfer_wait_(ea, is_write, rb) \
+({ \
+ int set, idx, line, byte; \
+ _spe_cache_nway_lookup_(ea, set, idx); \
+ \
+ if (unlikely(idx < 0)) { \
+ idx = _spe_cache_miss_(ea, set, -1); \
+ spu_writech(22, SPE_CACHE_SET_TAGMASK(set)); \
+ spu_mfcstat(MFC_TAG_UPDATE_ALL); \
+ } \
+ line = _spe_cacheline_num_(set, idx); \
+ byte = _spe_cacheline_byte_offset_(ea); \
+ (void *) &spe_cache_mem[line + byte]; \
+})
+
+#endif
diff --git a/Extras/software_cache/cache/include/nway-miss.h b/Extras/software_cache/cache/include/nway-miss.h
new file mode 100644
index 000000000..4c73ae8bf
--- /dev/null
+++ b/Extras/software_cache/cache/include/nway-miss.h
@@ -0,0 +1,51 @@
+/* --------------------------------------------------------------- */
+/* PLEASE DO NOT MODIFY THIS SECTION */
+/* This prolog section is automatically generated. */
+/* */
+/* (C) Copyright 2001,2006, */
+/* International Business Machines Corporation, */
+/* */
+/* All Rights Reserved. */
+/* --------------------------------------------------------------- */
+/* PROLOG END TAG zYx */
+/* nway-miss.h
+ *
+ * Copyright (C) 2005 IBM Corp.
+ *
+ * Internal handler for cache misses.
+ */
+
+#ifndef __SPE_CACHE_NWAY_MISS_H__
+#define __SPE_CACHE_NWAY_MISS_H__
+
+static int _spe_cache_miss_(unsigned int ea, int set, int avail)
+{
+ unsigned int ea_aligned = ea & ~SPE_CACHELINE_MASK;
+ vec_uint4 slot;
+ vec_uint4 exists = _spe_cache_set_lookup_(set, ea_aligned);
+ int idx, line;
+
+ /* Double check to make sure that the entry has not
+ * already been allocated in this set. This condition
+ * might occur if multiple lookups are being perfomed
+ * simultaneously.
+ */
+ if (unlikely(spu_extract(exists, 0) != 0)) {
+ return _spe_cache_idx_num_(exists);
+ }
+
+ /* Now check to see if there are empty slots
+ * that are available in the set.
+ */
+ slot = _spe_cache_replace_(set, avail);
+ idx = _spe_cache_idx_num_(slot);
+ line = _spe_cacheline_num_(set, idx);
+
+ spu_mfcdma32(&spe_cache_mem[line], ea_aligned, SPE_CACHELINE_SIZE,
+ SPE_CACHE_SET_TAGID(set), SPE_CACHE_GET);
+
+ spe_cache_dir[set][SPE_CACHE_NWAY_MASK - idx] = ea_aligned;
+
+ return idx;
+}
+#endif
diff --git a/Extras/software_cache/cache/include/nway-opt.h b/Extras/software_cache/cache/include/nway-opt.h
new file mode 100644
index 000000000..6a96773c6
--- /dev/null
+++ b/Extras/software_cache/cache/include/nway-opt.h
@@ -0,0 +1,153 @@
+/* --------------------------------------------------------------- */
+/* PLEASE DO NOT MODIFY THIS SECTION */
+/* This prolog section is automatically generated. */
+/* */
+/* (C) Copyright 2001,2006, */
+/* International Business Machines Corporation, */
+/* */
+/* All Rights Reserved. */
+/* --------------------------------------------------------------- */
+/* PROLOG END TAG zYx */
+/* nway-opt.h
+ *
+ * Copyright (C) 2006 IBM Corp.
+ *
+ * "Optimized" lookup operations for n-way set associative
+ * software managed cache.
+ */
+#include
+
+#ifndef __SPE_CACHE_NWAY_OPT_H_
+#define __SPE_CACHE_NWAY_OPT_H_
+
+/* __spe_cache_rd
+ * Look up and return data from the cache. If the data
+ * is not currently in cache then transfer it from main
+ * storage.
+ *
+ * This code uses a conditional branch to the cache miss
+ * handler in the event that the requested data is not
+ * in the cache. A branch hint is used to avoid paying
+ * the branch stall penalty.
+ */
+#define __spe_cache_rd(type, ea) \
+({ \
+ int set, idx, lnum, byte; \
+ type ret; \
+ _spe_cache_nway_lookup_(ea, set, idx); \
+ if (unlikely(idx < 0)) { \
+ idx = _spe_cache_miss_(ea, set, -1); \
+ spu_writech(22, SPE_CACHE_SET_TAGMASK(set)); \
+ spu_mfcstat(MFC_TAG_UPDATE_ALL); \
+ } \
+ lnum = _spe_cacheline_num_(set, idx); \
+ byte = _spe_cacheline_byte_offset_(ea); \
+ ret = *((type *) (&spe_cache_mem[lnum + byte])); \
+ ret; \
+})
+
+/**
+ * __spe_cache_rd_x4
+ * Fetch four data elements from the cache.
+ *
+ * This code uses one conditional branch in
+ * the event that any of the four elements
+ * are missing.
+ *
+ * On a miss, light weight locking is used to
+ * avoid casting out entries that were found.
+ * Further, we wait just once for the transfers,
+ * allowing for parallel [rather than serial]
+ * transfers.
+ */
+
+#define __spe_cache_rd_x4(type, ea_x4) \
+({ \
+ vector unsigned int missing; \
+ unsigned int ms; \
+ vector unsigned int cindex; \
+ unsigned int d0, d1, d2, d3; \
+ vector unsigned int s_x4; \
+ vector signed int i_x4; \
+ vector unsigned int ibyte, iline; \
+ vector unsigned int ret; \
+ unsigned int idx0, idx1, idx2, idx3; \
+ \
+ _spe_cache_nway_lookup_x4(ea_x4, s_x4, i_x4); \
+ missing = spu_rlmask ((vector unsigned int)i_x4, -8); \
+ ms = spu_extract (spu_gather (missing), 0); \
+ \
+ ibyte = _spe_cacheline_byte_offset_x4(ea_x4); \
+ \
+ iline = _spe_cacheline_num_x4(s_x4, \
+ (vector unsigned int)i_x4); \
+ \
+ cindex = spu_add (iline, ibyte); \
+ \
+ idx0 = spu_extract (cindex, 0); \
+ idx1 = spu_extract (cindex, 1); \
+ idx2 = spu_extract (cindex, 2); \
+ idx3 = spu_extract (cindex, 3); \
+ \
+ d0 = *((type *) (&spe_cache_mem[idx0])); \
+ d1 = *((type *) (&spe_cache_mem[idx1])); \
+ d2 = *((type *) (&spe_cache_mem[idx2])); \
+ d3 = *((type *) (&spe_cache_mem[idx3])); \
+ \
+ ret = _load_vec_uint4 (d0, d1, d2, d3); \
+ \
+ if (unlikely(ms)) { \
+ int b0 = spu_extract (ibyte, 0); \
+ int b1 = spu_extract (ibyte, 1); \
+ int b2 = spu_extract (ibyte, 2); \
+ int b3 = spu_extract (ibyte, 3); \
+ int lnum0; \
+ int lnum1; \
+ int lnum2; \
+ int lnum3; \
+ int s0 = spu_extract (s_x4, 0); \
+ int s1 = spu_extract (s_x4, 1); \
+ int s2 = spu_extract (s_x4, 2); \
+ int s3 = spu_extract (s_x4, 3); \
+ int i0 = spu_extract (i_x4, 0); \
+ int i1 = spu_extract (i_x4, 1); \
+ int i2 = spu_extract (i_x4, 2); \
+ int i3 = spu_extract (i_x4, 3); \
+ unsigned int ea0 = spu_extract(ea_x4, 0); \
+ unsigned int ea1 = spu_extract(ea_x4, 1); \
+ unsigned int ea2 = spu_extract(ea_x4, 2); \
+ unsigned int ea3 = spu_extract(ea_x4, 3); \
+ int avail = -1; \
+ \
+ avail &= ~(((i0 < 0) ? 0 : (1 << i0)) | \
+ ((i1 < 0) ? 0 : (1 << i1)) | \
+ ((i2 < 0) ? 0 : (1 << i2)) | \
+ ((i3 < 0) ? 0 : (1 << i3))); \
+ \
+ i0 = _spe_cache_miss_(ea0, s0, avail); \
+ avail &= ~(1 << i0); \
+ i1 = _spe_cache_miss_(ea1, s1, avail); \
+ avail &= ~(1 << i1); \
+ i2 = _spe_cache_miss_(ea2, s2, avail); \
+ avail &= ~(1 << i2); \
+ i3 = _spe_cache_miss_(ea3, s3, avail); \
+ \
+ lnum0 = _spe_cacheline_num_(s0, i0); \
+ lnum1 = _spe_cacheline_num_(s1, i1); \
+ lnum2 = _spe_cacheline_num_(s2, i2); \
+ lnum3 = _spe_cacheline_num_(s3, i3); \
+ \
+ spu_writech(22, SPE_CACHE_SET_TAGMASK(set)); \
+ spu_mfcstat(MFC_TAG_UPDATE_ALL); \
+ \
+ d0 = *((type *) (&spe_cache_mem[lnum0 + b0])); \
+ d1 = *((type *) (&spe_cache_mem[lnum1 + b1])); \
+ d2 = *((type *) (&spe_cache_mem[lnum2 + b2])); \
+ d3 = *((type *) (&spe_cache_mem[lnum3 + b3])); \
+ \
+ ret = _load_vec_uint4 (d0, d1, d2, d3); \
+ } \
+ ret; \
+})
+
+#endif /* _SPE_CACHE_NWAY_OPT_H_ */
diff --git a/Extras/software_cache/cache/include/nway-replace.h b/Extras/software_cache/cache/include/nway-replace.h
new file mode 100644
index 000000000..72fce1876
--- /dev/null
+++ b/Extras/software_cache/cache/include/nway-replace.h
@@ -0,0 +1,38 @@
+/* --------------------------------------------------------------- */
+/* PLEASE DO NOT MODIFY THIS SECTION */
+/* This prolog section is automatically generated. */
+/* */
+/* (C) Copyright 2001,2006, */
+/* International Business Machines Corporation, */
+/* */
+/* All Rights Reserved. */
+/* --------------------------------------------------------------- */
+/* PROLOG END TAG zYx */
+/* nway-replace.h
+ *
+ * Copyright (C) 2005 IBM Corp.
+ *
+ * Implement replacement for software
+ * managed cache.
+ */
+
+#ifndef __SPE_CACHE_NWAY_REPLACE_H_
+#define __SPE_CACHE_NWAY_REPLACE_H_
+
+static vec_uint4 spe_cache_replace_cntr[SPE_CACHE_NSETS+1];
+
+static inline vec_uint4 _spe_cache_replace_(int set, int avail)
+{
+ unsigned int mask = ((1 << SPE_CACHE_NWAY) - 1) & avail;
+ unsigned int curr, currbit, next;
+
+ curr = spu_extract(spe_cache_replace_cntr[set], 0) & SPE_CACHE_NWAY_MASK;
+ currbit = (1 << curr);
+ next = (curr + 1) & SPE_CACHE_NWAY_MASK;
+ spe_cache_replace_cntr[set] = (vec_uint4) spu_promote(next, 0);
+ mask = (mask & currbit) ? currbit : mask;
+
+ return (vec_uint4) spu_promote(mask, 0);
+}
+
+#endif
diff --git a/Extras/software_cache/cache/include/nway.h b/Extras/software_cache/cache/include/nway.h
new file mode 100644
index 000000000..494cd6bea
--- /dev/null
+++ b/Extras/software_cache/cache/include/nway.h
@@ -0,0 +1,105 @@
+/* --------------------------------------------------------------- */
+/* PLEASE DO NOT MODIFY THIS SECTION */
+/* This prolog section is automatically generated. */
+/* */
+/* (C) Copyright 2001,2006, */
+/* International Business Machines Corporation, */
+/* */
+/* All Rights Reserved. */
+/* --------------------------------------------------------------- */
+/* PROLOG END TAG zYx */
+/* nway.h
+ *
+ * Copyright (C) 2005 IBM Corp.
+ *
+ * Support for n-way set associative software
+ * managed cache. The 4-way associative cache
+ * is the only interface exposed currently.
+ */
+
+#ifndef __SPE_CACHE_NWAY_H_
+#define __SPE_CACHE_NWAY_H_
+
+/**
+ ** Defn's for n-way set associativity.
+ ** Default is 4-way.
+ */
+#define SPE_CACHE_NWAY 4
+#define SPE_CACHE_NWAY_SHIFT 2
+
+#define SPE_CACHE_NWAY_MASK (SPE_CACHE_NWAY - 1)
+#define SPE_CACHE_NENTRIES (SPE_CACHE_NWAY * SPE_CACHE_NSETS)
+#define SPE_CACHE_MEM_SIZE (SPE_CACHE_NENTRIES * SPE_CACHELINE_SIZE)
+
+#define _spe_cache_set_num_(ea) \
+({ \
+ unsigned int ead, eadm, ret; \
+ ead = ((ea) >> SPE_CACHELINE_SHIFT); \
+ eadm = ((ea) >> (SPE_CACHELINE_SHIFT+2)); \
+ ret = (ead ^ eadm) & SPE_CACHE_NSETS_MASK; \
+ ret; \
+})
+
+#define _spe_cache_set_num_x4(ea_x4) \
+({ \
+ vector unsigned int tmp0; \
+ vector unsigned int tmp1; \
+ tmp0 = spu_rlmask (ea_x4, -SPE_CACHELINE_SHIFT); \
+ tmp1 = spu_rlmask (ea_x4, -(SPE_CACHELINE_SHIFT+1)); \
+ spu_and (spu_xor (tmp0, tmp1), SPE_CACHE_NSETS_MASK); \
+})
+
+#define _spe_cache_idx_num_x4(found) \
+ spu_sub((unsigned int) 31, spu_cntlz(found))
+
+#define _spe_cache_idx_num_(found) \
+ spu_extract(spu_sub((unsigned int) 31, spu_cntlz(found)), 0)
+
+#define _spe_cacheline_num_(set, idx) \
+ (((set << SPE_CACHE_NWAY_SHIFT) + idx) << SPE_CACHELINE_SHIFT)
+
+#define _spe_cacheline_num_x4(set, idx) \
+ spu_sl (spu_add (spu_sl (set, SPE_CACHE_NWAY_SHIFT), idx), SPE_CACHELINE_SHIFT)
+
+#define _spe_cacheline_is_dirty_(set, idx) \
+ (spe_cache_dir[set][SPE_CACHE_NWAY_MASK-(idx)] & SPE_CACHELINE_DIRTY)
+
+#define _spe_cacheline_is_locked_(set, idx) \
+ (spe_cache_dir[set][SPE_CACHE_NWAY_MASK-(idx)] & SPE_CACHELINE_LOCKED)
+
+#define _spe_lock_cacheline_(set, idx) \
+ spe_cache_dir[set][SPE_CACHE_NWAY_MASK-(idx)] |= SPE_CACHELINE_LOCKED
+
+#define _spe_unlock_cacheline_(set, idx) \
+ spe_cache_dir[set][SPE_CACHE_NWAY_MASK-(idx)] &= ~SPE_CACHELINE_LOCKED
+
+
+/**
+ * spe_cache_dir
+ * This is the n-way set associative cache
+ * directory. Entries are either zero (unused)
+ * or non-zero (used).
+ *
+ * State for one additional (dummy) set is
+ * allocated to improve efficiency of cache
+ * line locking.
+ * volatile seems not to be necessary here, the SCE toolchain guarantees a barrier after dma transfer
+ */
+static unsigned int spe_cache_dir[SPE_CACHE_NSETS+1][SPE_CACHE_NWAY]
+ __attribute__ ((aligned(16)));
+
+/**
+ * spe_cache_mem
+ * A contiguous set of cachelines in LS memory,
+ * one line for each entry in the cache.
+ * volatile seems not to be necessary here, the SCE toolchain guarantees a barrier after dma transfer
+ */
+static char spe_cache_mem[SPE_CACHE_MEM_SIZE]
+ __attribute__ ((aligned(128)));
+
+#include "nway-lookup.h"
+#include "nway-replace.h"
+#include "nway-miss.h"
+#include "nway-opt.h"
+
+#endif
diff --git a/Extras/software_cache/cache/include/spe_cache.h b/Extras/software_cache/cache/include/spe_cache.h
new file mode 100644
index 000000000..f632bf11b
--- /dev/null
+++ b/Extras/software_cache/cache/include/spe_cache.h
@@ -0,0 +1,32 @@
+/* --------------------------------------------------------------- */
+/* PLEASE DO NOT MODIFY THIS SECTION */
+/* This prolog section is automatically generated. */
+/* */
+/* (C) Copyright 2001,2006, */
+/* International Business Machines Corporation, */
+/* */
+/* All Rights Reserved. */
+/* --------------------------------------------------------------- */
+/* PROLOG END TAG zYx */
+/* spe_cache.h
+ *
+ * Copyright (C) 2005 IBM Corp.
+ *
+ * Top level include file implementing
+ * software managed cache.
+ */
+
+#ifndef __SPE_CACHE_H__
+#define __SPE_CACHE_H__ 1
+
+#include "vec_literal.h"
+#include "ilog2.h"
+#include "memset.h"
+//#include
+
+#include "defs.h"
+#include "dma.h"
+#include "nway.h"
+#include "api.h"
+
+#endif
diff --git a/Extras/software_cache/cache/include/vec_literal.h b/Extras/software_cache/cache/include/vec_literal.h
new file mode 100644
index 000000000..a7734708f
--- /dev/null
+++ b/Extras/software_cache/cache/include/vec_literal.h
@@ -0,0 +1,74 @@
+/* @(#)86 1.3 src/include/vec_literal.h, sw.includes, sdk_pub 10/11/05 16:00:27 */
+/* -------------------------------------------------------------- */
+/* (C) Copyright 2001,2005, */
+/* International Business Machines Corporation, */
+/* Sony Computer Entertainment Incorporated, */
+/* Toshiba Corporation. */
+/* */
+/* All Rights Reserved. */
+/* -------------------------------------------------------------- */
+/* PROLOG END TAG zYx */
+#ifndef _VEC_LITERAL_H_
+#define _VEC_LITERAL_H_
+
+/* This header files provides an abstraction for the various implementations
+ * of vector literal construction. The two formats are:
+ *
+ * 1) Altivec styled using parenthesis
+ * 2) C grammer friendly styled using curly braces
+ *
+ * The macro, VEC_LITERAL has been developed to provide some portability
+ * in these two styles. To achieve true portability, user must specify all
+ * elements of the vector being initialized. A single element can be provided
+ * but only the first element guarenteed across both construction styles.
+ *
+ * The VEC_SPLAT_* macros have been provided for portability of vector literal
+ * construction when all the elements of the vector contain the same value.
+ */
+
+#ifdef __SPU__
+#include
+#endif
+
+
+#ifdef __ALTIVEC_LITERAL_STYLE__
+/* Use altivec style.
+ */
+#define VEC_LITERAL(_type, ...) ((_type)(__VA_ARGS__))
+
+#define VEC_SPLAT_U8(_val) ((vector unsigned char)(_val))
+#define VEC_SPLAT_S8(_val) ((vector signed char)(_val))
+
+#define VEC_SPLAT_U16(_val) ((vector unsigned short)(_val))
+#define VEC_SPLAT_S16(_val) ((vector signed short)(_val))
+
+#define VEC_SPLAT_U32(_val) ((vector unsigned int)(_val))
+#define VEC_SPLAT_S32(_val) ((vector signed int)(_val))
+#define VEC_SPLAT_F32(_val) ((vector float)(_val))
+
+#define VEC_SPLAT_U64(_val) ((vector unsigned long long)(_val))
+#define VEC_SPLAT_S64(_val) ((vector signed long long)(_val))
+#define VEC_SPLAT_F64(_val) ((vector double)(_val))
+
+#else
+/* Use curly brace style.
+ */
+#define VEC_LITERAL(_type, ...) ((_type){__VA_ARGS__})
+
+#define VEC_SPLAT_U8(_val) ((vector unsigned char){_val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val})
+#define VEC_SPLAT_S8(_val) ((vector signed char){_val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val})
+
+#define VEC_SPLAT_U16(_val) ((vector unsigned short){_val, _val, _val, _val, _val, _val, _val, _val})
+#define VEC_SPLAT_S16(_val) ((vector signed short){_val, _val, _val, _val, _val, _val, _val, _val})
+
+#define VEC_SPLAT_U32(_val) ((vector unsigned int){_val, _val, _val, _val})
+#define VEC_SPLAT_S32(_val) ((vector signed int){_val, _val, _val, _val})
+#define VEC_SPLAT_F32(_val) ((vector float){_val, _val, _val, _val})
+
+#define VEC_SPLAT_U64(_val) ((vector unsigned long long){_val, _val})
+#define VEC_SPLAT_S64(_val) ((vector signed long long){_val, _val})
+#define VEC_SPLAT_F64(_val) ((vector double){_val, _val})
+
+#endif
+
+#endif /* _VEC_LITERAL_H_ */
diff --git a/Extras/software_cache/hello.spu.c b/Extras/software_cache/hello.spu.c
new file mode 100644
index 000000000..3433c9f58
--- /dev/null
+++ b/Extras/software_cache/hello.spu.c
@@ -0,0 +1,83 @@
+/*
+ * SCE CONFIDENTIAL
+ * PLAYSTATION(R)3 Programmer Tool Runtime Library 085.007
+ * Copyright (C) 2005 Sony Computer Entertainment Inc.
+ * All Rights Reserved.
+ */
+#include
+#include
+
+#include
+#include
+#include
+#include
+
+#define SPE_CACHE_NWAY 4
+#define SPE_CACHE_NSETS 32
+#define SPE_CACHELINE_SIZE 512
+#define SPE_CACHE_SET_TAGID(set) 16
+
+#define USE_SOFTWARE_CACHE 1
+#ifdef USE_SOFTWARE_CACHE
+
+#include "cache/include/spe_cache.h"
+
+void * spe_readcache(unsigned int ea)
+{
+
+ int set, idx, line, byte;
+ _spe_cache_nway_lookup_(ea, set, idx);
+
+ if (unlikely(idx < 0)) {
+ idx = _spe_cache_miss_(ea, set, -1);
+ spu_writech(22, SPE_CACHE_SET_TAGMASK(set));
+ spu_mfcstat(MFC_TAG_UPDATE_ALL);
+ }
+ line = _spe_cacheline_num_(set, idx);
+ byte = _spe_cacheline_byte_offset_(ea);
+ return (void *) &spe_cache_mem[line + byte];
+}
+#endif //USE_SOFTWARE_CACHE
+
+int main(int spu_num,uint64_t mainmemPtr)
+{
+ int memPtr = (int) mainmemPtr;
+
+
+#define MAX_BUF 256
+ char spuBuffer[MAX_BUF];
+ spuBuffer[0] = 0;
+
+ char* result,*result2; //= spe_cache_rd(mainmemPtr);
+
+#ifdef USE_SOFTWARE_CACHE
+
+ //this is a brute-force sample.
+ //you can use the software cache more efficient using __spe_cache_rd_x4 to read 4 elements at a time
+
+ int i=0;
+ do
+ {
+ result = spe_readcache(mainmemPtr+i);
+ //spe_readcache is the expanded version of spe_cache_rd MACRO
+
+ spuBuffer[i] = result[0];
+ i++;
+ } while (result[0] && (i= MAX_BUF)
+ {
+ spu_printf("spe_readcache buffer overflow. is the buffer 0-terminated?\n");
+ }
+ spu_printf("spe_cache_rd(%x) = %s\n", memPtr,spuBuffer);
+
+ sys_spu_thread_exit(0);
+}
+
diff --git a/Extras/software_cache/hello.spu.mk b/Extras/software_cache/hello.spu.mk
new file mode 100644
index 000000000..0b1997241
--- /dev/null
+++ b/Extras/software_cache/hello.spu.mk
@@ -0,0 +1,19 @@
+# SCE CONFIDENTIAL
+# PLAYSTATION(R)3 Programmer Tool Runtime Library 085.007
+# Copyright (C) 2005 Sony Computer Entertainment Inc.
+# All Rights Reserved.
+#
+
+CELL_MK_DIR ?= $(CELL_SDK)/samples/mk
+
+
+include $(CELL_MK_DIR)/sdk.makedef.mk
+
+SPU_INCDIRS += -Icache/include
+SPU_SRCS = hello.spu.c
+SPU_TARGET = hello.spu.elf
+SPU_OPTIMIZE_LV=-O0
+
+include $(CELL_MK_DIR)/sdk.target.mk
+
+
diff --git a/Extras/software_cache/spu_printf_server.h b/Extras/software_cache/spu_printf_server.h
new file mode 100644
index 000000000..e477eac50
--- /dev/null
+++ b/Extras/software_cache/spu_printf_server.h
@@ -0,0 +1,19 @@
+/* SCE CONFIDENTIAL */
+/* PLAYSTATION(R)3 Programmer Tool Runtime Library 085.007 */
+/* Copyright (C) 2005 Sony Computer Entertainment Inc. */
+/* All Rights Reserved. */
+
+#include
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+void spu_printf_server_entry(uint64_t arg);
+int spu_printf_server_initialize(void);
+int spu_printf_server_finalize(void);
+int spu_printf_server_register(sys_spu_thread_t spu);
+int spu_printf_server_unregister(sys_spu_thread_t spu);
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
diff --git a/Extras/software_cache/spu_printf_server.ppu.c b/Extras/software_cache/spu_printf_server.ppu.c
new file mode 100644
index 000000000..48dab3b0f
--- /dev/null
+++ b/Extras/software_cache/spu_printf_server.ppu.c
@@ -0,0 +1,204 @@
+/*
+ * SCE CONFIDENTIAL
+ * PLAYSTATION(R)3 Programmer Tool Runtime Library 085.007
+ * Copyright (C) 2005 Sony Computer Entertainment Inc.
+ * All Rights Reserved.
+ *
+ * The SPU printf server is a PPU thread which collaborates with an SPU to
+ * output strings.
+ *
+ * On SPU-side, spu_printf() places the output string and arguments on a stack
+ * in the local storage, and passes its local-storage address with an SPU
+ * thread user event from SPU port 1 to PPU. On PPU-side,
+ * spu_thread_sprintf fetches the stack in the local stroage by DMA, and parse
+ * it to string-format.
+ *
+ * The SPU printf server takes charge of the tasks on PPU-side. The sequence
+ * of its task is as follows.
+ * 1. Receive events by sys_event_queue_receive()
+ * 2. Parse the received spu_printf stack address by spu_thread_snprintf.
+ * 3. Output the parsed string by printf(), and go back to step 1.
+ *
+ * Initialization of the SPU printf server and registeration of SPU threads to
+ * the SPU printf server is required. These can be done by
+ * spu_printf_server_initilize() and spu_printf_server_register().
+ * What they actually do is to create a PPU thread and event queue, and
+ * connect the SPU thread to the event queue.
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include "spu_printf_server.h"
+
+#define STACK_SIZE 4096
+#define PRIO 200
+
+static sys_ppu_thread_t thread;
+static sys_event_queue_t equeue;
+static sys_event_queue_attribute_t eattr;
+static sys_event_port_t terminating_port;
+
+#define TERMINATING_PORT_NAME 0xFEE1DEAD
+#define SPU_PORT_PRINTF 0x1
+
+int spu_printf_server_initialize()
+{
+ int ret;
+
+ /* Create event */
+ sys_event_queue_attribute_initialize(eattr);
+ ret = sys_event_queue_create(&equeue, &eattr, SYS_EVENT_PORT_LOCAL, 127);
+ if (ret) {
+ printf("sys_event_queue_create faild %d\n", ret);
+ return -1;
+ }
+
+ /* Create PPU thread */
+ ret = sys_ppu_thread_create(&thread, spu_printf_server_entry, 0UL, PRIO,
+ STACK_SIZE,
+ SYS_PPU_THREAD_CREATE_JOINABLE,
+ (char*)"spu_printf_server");
+ if (ret) {
+ printf ("spu_printf_server_initialize: sys_ppu_thread_create failed %d\n", ret);
+ return -1;
+ }
+
+ /*
+ * Create the terminating port. This port is used only in
+ * sys_printf_server_finalize().
+ */
+ ret = sys_event_port_create(&terminating_port,
+ SYS_EVENT_PORT_LOCAL,
+ TERMINATING_PORT_NAME);
+ if (ret) {
+ printf ("spu_printf_server_initialize: sys_event_port_create failed %d\n", ret);
+ return -1;
+ }
+
+ ret = sys_event_port_connect_local(terminating_port, equeue);
+ if (ret) {
+ printf ("spu_printf_server_initialize: sys_event_port_connect_local failed %d\n", ret);
+ return -1;
+ }
+
+ return 0;
+}
+
+
+/*
+ * Before call this, SPU threads which are registered finishes to send
+ * printf event.
+ */
+int spu_printf_server_finalize()
+{
+ int ret;
+
+ /*
+ * Send an event from the terminating port to notify the termination to
+ * the SPU printf server
+ */
+ ret = sys_event_port_send(terminating_port, 0, 0, 0);
+ if (ret) {
+ printf("sys_event_queue_cancel failed %d\n", ret);
+ return -1;
+ }
+
+ /* Wait for the termination of the SPU printf server */
+ uint64_t exit_status;
+ ret = sys_ppu_thread_join(thread, &exit_status);
+ if (ret) {
+ printf("sys_ppu_thread_join failed %d\n", ret);
+ return -1;
+ }
+
+ /* Disconnect and destroy the terminating port */
+ ret = sys_event_port_disconnect(terminating_port);
+ if (ret) {
+ printf("sys_event_disconnect failed %d\n", ret);
+ }
+ ret = sys_event_port_destroy(terminating_port);
+ if (ret) {
+ printf("sys_event_port_destroy failed %d\n", ret);
+ }
+
+ /* Destroy the event queue */
+ ret = sys_event_queue_destroy(equeue, 0);
+ if (ret) {
+ printf("sys_event_queue_destroy failed %d\n", ret);
+ return -1;
+ }
+
+ return 0;
+}
+
+
+int spu_printf_server_register(sys_spu_thread_t spu)
+{
+ int ret;
+
+ ret = sys_spu_thread_connect_event(spu, equeue,
+ SYS_SPU_THREAD_EVENT_USER, SPU_PORT_PRINTF);
+ if (ret) {
+ printf("sys_spu_thread_connect_event faild %d\n", ret);
+ return -1;
+ }
+
+ return 0;
+}
+
+
+int spu_printf_server_unregister(sys_spu_thread_t spu)
+{
+ int ret;
+
+ ret = sys_spu_thread_disconnect_event(spu,
+ SYS_SPU_THREAD_EVENT_USER, SPU_PORT_PRINTF);
+ if (ret) {
+ printf("sys_spu_thread_disconnect_event faild %d\n", ret);
+ return -1;
+ }
+
+ return 0;
+}
+
+
+void spu_printf_server_entry(uint64_t arg)
+{
+ (void)arg; /* This thread does not use the argument */
+
+ int ret;
+ sys_event_t event;
+ sys_spu_thread_t spu;
+
+ for (;;) {
+ ret = sys_event_queue_receive(equeue, &event, SYS_NO_TIMEOUT);
+ if (ret) {
+ printf("sys_event_queue_receive failed %d\n", ret);
+ break;
+ }
+
+ /*
+ * If an event is sent from the terminating port, the SPU printf
+ * server exits.
+ */
+ if (event.source == TERMINATING_PORT_NAME) {
+ printf("Finalize the SPU printf server.\n");
+ break;
+ }
+
+ spu = event.data1;
+
+ int sret = spu_thread_printf(spu, event.data3);
+ ret = sys_spu_thread_write_spu_mb(spu, sret);
+ if (ret) {
+ printf("sys_spu_thread_write_spu_mb failed %d\n", ret);
+ break;
+ }
+ }
+
+ sys_ppu_thread_exit(0);
+}
+
diff --git a/Extras/software_cache/spu_thr_printf.ppu.c b/Extras/software_cache/spu_thr_printf.ppu.c
new file mode 100644
index 000000000..38d2f4bb1
--- /dev/null
+++ b/Extras/software_cache/spu_thr_printf.ppu.c
@@ -0,0 +1,243 @@
+/*
+ * SCE CONFIDENTIAL
+ * PLAYSTATION(R)3 Programmer Tool Runtime Library 085.007
+ * Copyright (C) 2005 Sony Computer Entertainment Inc.
+ * All Rights Reserved.
+ *
+ * File: spu_thr_printf.c
+ * Description:
+ * This sample shows how to output strings by SPU programs. spu_printf()
+ * called by
+ *
+ */
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include "spu_printf_server.h" /* SPU printf server */
+
+#define MAX_PHYSICAL_SPU 4
+#define MAX_RAW_SPU 0
+#define NUM_SPU_THREADS 4 /* The number of SPU threads in the group */
+#define PRIORITY 100
+#ifdef SN_TARGET_PS3
+#define SPU_PROG (SYS_APP_HOME "/SPU_printf.spu.self")
+#else
+#define SPU_PROG (SYS_APP_HOME "/hello.spu.self")
+#endif
+
+#define IN_BUF_SIZE 256
+#define OUT_BUF_SIZE 256
+
+volatile uint8_t in_buf[IN_BUF_SIZE];
+volatile uint8_t out_buf[OUT_BUF_SIZE];
+uint32_t in_size = IN_BUF_SIZE;
+uint32_t out_size = OUT_BUF_SIZE;
+
+int main(void)
+{
+
+
+
+ sprintf(in_buf,"hello world");
+
+ sys_spu_thread_group_t group; /* SPU thread group ID */
+ const char *group_name = "Group";
+ sys_spu_thread_group_attribute_t group_attr;/* SPU thread group attribute*/
+ sys_spu_thread_t threads[NUM_SPU_THREADS]; /* SPU thread IDs */
+ sys_spu_thread_attribute_t thread_attr; /* SPU thread attribute */
+ const char *thread_names[NUM_SPU_THREADS] =
+ {"SPU Thread 0",
+ "SPU Thread 1",
+ "SPU Thread 2",
+ "SPU Thread 3"}; /* The names of SPU threads */
+ sys_spu_image_t spu_img;
+ int ret;
+
+ /*
+ * Initialize SPUs
+ */
+ printf("Initializing SPUs\n");
+ ret = sys_spu_initialize(MAX_PHYSICAL_SPU, MAX_RAW_SPU);
+ if (ret != CELL_OK) {
+ fprintf(stderr, "sys_spu_initialize failed: %#.8x\n", ret);
+ exit(ret);
+ }
+
+ /*
+ * Create an SPU thread group
+ */
+ printf("Creating an SPU thread group.\n");
+ group_attr.name = group_name;
+ group_attr.nsize = strlen(group_attr.name) + 1; /* Add 1 for '\0' */
+ group_attr.type = SYS_SPU_THREAD_GROUP_TYPE_NORMAL;
+ ret = sys_spu_thread_group_create(&group,
+ NUM_SPU_THREADS,
+ PRIORITY,
+ &group_attr);
+ if (ret != CELL_OK) {
+ fprintf(stderr, "sys_spu_thread_group_create failed: %#.8x\n", ret);
+ exit(ret);
+ }
+
+ ret = sys_spu_image_open(&spu_img, SPU_PROG);
+ if (ret != CELL_OK) {
+ fprintf(stderr, "sys_spu_image_open failed: %#.8x\n", ret);
+ exit(ret);
+ }
+
+ /*
+ * Initialize the SPU printf server
+ *
+ * What spu_printf_server_initialize() actually does is to create an
+ * PPU thread and an event queue which handle the events sent by
+ * spu_printf().
+ */
+ ret = spu_printf_server_initialize();
+ if (ret != SUCCEEDED) {
+ fprintf(stderr, "spu_printf_server_initialize failed: %#.8x\n", ret);
+ exit(ret);
+ }
+ /*
+ * In this loop, all SPU threads in the SPU thread group are initialized
+ * with the loaded SPU ELF image.
+ */
+ for (int i = 0; i < NUM_SPU_THREADS; i++) {
+ sys_spu_thread_argument_t thread_args;
+ int spu_num = i;
+
+ printf("Initializing SPU thread %d\n", i);
+
+ /*
+ * nsegs, segs and entry_point have already been initialized by
+ * sys_spu_thread_elf_loader().
+ */
+ thread_attr.name = thread_names[i];
+ thread_attr.nsize = strlen(thread_names[i]) + 1;
+ thread_attr.option = SYS_SPU_THREAD_OPTION_NONE;
+
+ /*
+ * Pass the SPU number to the SPU thread as the first parameter.
+ */
+ thread_args.arg1 = SYS_SPU_THREAD_ARGUMENT_LET_32(spu_num);
+ thread_args.arg2 = SYS_SPU_THREAD_ARGUMENT_LET_64((uint64_t)in_buf);
+
+
+
+ /*
+ * The third argument specifies the SPU number.
+ * The SPU number of each SPU thread must be unique within the SPU
+ * thread group.
+ */
+ ret = sys_spu_thread_initialize(&threads[i],
+ group,
+ spu_num,
+ &spu_img,
+ &thread_attr,
+ &thread_args);
+ if (ret != CELL_OK) {
+ fprintf(stderr, "sys_spu_thread_initialize failed: %#.8x\n", ret);
+ exit(ret);
+ }
+
+ /*
+ * Register the SPU thread to the SPU printf server.
+ *
+ * spu_printf_server_register() establishes the connection between
+ * the SPU thread and the SPU printf server's event queue.
+ */
+ ret = spu_printf_server_register(threads[i]);
+ if (ret != CELL_OK) {
+ fprintf(stderr, "spu_printf_server_register failed: %#.8x\n", ret);
+ exit(ret);
+ }
+ }
+
+ printf("All SPU threads have been successfully initialized.\n");
+
+ /*
+ * Start the SPU thread group
+ *
+ * The SPU thread group will be in the READY state, and will become in
+ * the RUNNING state when the kernel assigns and executes it onto SPUs.
+ */
+ printf("Starting the SPU thread group.\n");
+ ret = sys_spu_thread_group_start(group);
+ if (ret != CELL_OK) {
+ fprintf(stderr, "sys_spu_thread_group_start failed: %#.8x\n", ret);
+ exit(ret);
+ }
+
+ /*
+ * Wait for the termination of the SPU thread group.
+ */
+ printf("Waiting for the SPU thread group to be terminated.\n");
+ int cause, status;
+ ret = sys_spu_thread_group_join(group, &cause, &status);
+ if (ret != CELL_OK) {
+ fprintf(stderr, "sys_spu_thread_group_join failed: %#.8x\n", ret);
+ exit(ret);
+ }
+
+ /*
+ * Show the exit cause and status.
+ */
+ switch(cause) {
+ case SYS_SPU_THREAD_GROUP_JOIN_GROUP_EXIT:
+ printf("The SPU thread group exited by sys_spu_thread_group_exit().\n");
+ printf("The group's exit status = %d\n", status);
+ break;
+ case SYS_SPU_THREAD_GROUP_JOIN_ALL_THREADS_EXIT:
+ printf("All SPU thread exited by sys_spu_thread_exit().\n");
+ for (int i = 0; i < NUM_SPU_THREADS; i++) {
+ int thr_exit_status;
+ ret = sys_spu_thread_get_exit_status(threads[i], &thr_exit_status);
+ if (ret != CELL_OK) {
+ fprintf(stderr, "sys_spu_thread_get_exit_status failed: %#.8x\n", ret);
+ }
+ printf("SPU thread %d's exit status = %d\n", i, thr_exit_status);
+ }
+ break;
+ case SYS_SPU_THREAD_GROUP_JOIN_TERMINATED:
+ printf("The SPU thread group is terminated by sys_spu_thread_terminate().\n");
+ printf("The group's exit status = %d\n", status);
+ break;
+ default:
+ fprintf(stderr, "Unknown exit cause: %d\n", cause);
+ break;
+ }
+
+ /*
+ * Destroy the SPU thread group and clean up resources.
+ */
+ ret = sys_spu_thread_group_destroy(group);
+ if (ret != CELL_OK) {
+ fprintf(stderr, "sys_spu_thread_group_destroy failed: %#.8x\n", ret);
+ }
+
+ ret = sys_spu_image_close(&spu_img);
+ if (ret != CELL_OK) {
+ fprintf(stderr, "sys_spu_image_close failed: %.8x\n", ret);
+ }
+
+ /*
+ * Finalize the SPU printf server.
+ *
+ * This function let the PPU thread exit.
+ * The event queue will be destroyed.
+ */
+ ret = spu_printf_server_finalize();
+ if (ret != CELL_OK) {
+ fprintf(stderr, "spu_printf_server_finalize failed: %#.8x\n", ret);
+ }
+
+ printf("Exiting.\n");
+ return 0;
+}
+
+
diff --git a/Extras/software_cache/spu_thr_printf.ppu.mk b/Extras/software_cache/spu_thr_printf.ppu.mk
new file mode 100644
index 000000000..9f7d0c477
--- /dev/null
+++ b/Extras/software_cache/spu_thr_printf.ppu.mk
@@ -0,0 +1,16 @@
+# SCE CONFIDENTIAL
+# PLAYSTATION(R)3 Programmer Tool Runtime Library 085.007
+# Copyright (C) 2005 Sony Computer Entertainment Inc.
+# All Rights Reserved.
+#
+
+CELL_MK_DIR ?= $(CELL_SDK)/samples/mk
+
+include $(CELL_MK_DIR)/sdk.makedef.mk
+
+PPU_SRCS = spu_thr_printf.ppu.c spu_printf_server.ppu.c
+PPU_TARGET = spu_thr_printf.ppu.elf
+
+include $(CELL_MK_DIR)/sdk.target.mk
+
+
diff --git a/Extras/software_cache/spu_thr_printf_2.sln b/Extras/software_cache/spu_thr_printf_2.sln
new file mode 100644
index 000000000..890915779
--- /dev/null
+++ b/Extras/software_cache/spu_thr_printf_2.sln
@@ -0,0 +1,29 @@
+
+Microsoft Visual Studio Solution File, Format Version 9.00
+# Visual Studio 2005
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "spu_thr_printf_2", "spu_thr_printf_2.vcproj", "{3494AF8B-FDA7-4CEA-B775-4C5C45599D5F}"
+ ProjectSection(ProjectDependencies) = postProject
+ {47EE939D-CB3D-4600-B8B6-79FDF607E133} = {47EE939D-CB3D-4600-B8B6-79FDF607E133}
+ EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SPU_printf", "SPU_printf\SPU_printf.vcproj", "{47EE939D-CB3D-4600-B8B6-79FDF607E133}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ PS3 Debug|Win32 = PS3 Debug|Win32
+ PS3 Release|Win32 = PS3 Release|Win32
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {3494AF8B-FDA7-4CEA-B775-4C5C45599D5F}.PS3 Debug|Win32.ActiveCfg = PS3 Debug|Win32
+ {3494AF8B-FDA7-4CEA-B775-4C5C45599D5F}.PS3 Debug|Win32.Build.0 = PS3 Debug|Win32
+ {3494AF8B-FDA7-4CEA-B775-4C5C45599D5F}.PS3 Release|Win32.ActiveCfg = PS3 Release|Win32
+ {3494AF8B-FDA7-4CEA-B775-4C5C45599D5F}.PS3 Release|Win32.Build.0 = PS3 Release|Win32
+ {47EE939D-CB3D-4600-B8B6-79FDF607E133}.PS3 Debug|Win32.ActiveCfg = PS3 Debug|Win32
+ {47EE939D-CB3D-4600-B8B6-79FDF607E133}.PS3 Debug|Win32.Build.0 = PS3 Debug|Win32
+ {47EE939D-CB3D-4600-B8B6-79FDF607E133}.PS3 Release|Win32.ActiveCfg = PS3 Release|Win32
+ {47EE939D-CB3D-4600-B8B6-79FDF607E133}.PS3 Release|Win32.Build.0 = PS3 Release|Win32
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+EndGlobal
diff --git a/Extras/software_cache/spu_thr_printf_2.vcproj b/Extras/software_cache/spu_thr_printf_2.vcproj
new file mode 100644
index 000000000..a98cb1ca5
--- /dev/null
+++ b/Extras/software_cache/spu_thr_printf_2.vcproj
@@ -0,0 +1,200 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+