added updated version of simdmathlibrary-1.0.1

2007-07-27 18:53:58 +00:00
parent fddd6c5721
commit f360dd27d6
377 changed files with 9928 additions and 6136 deletions
--- a/Extras/simdmathlibrary/spu/Makefile
+++ b/Extras/simdmathlibrary/spu/Makefile
@@ -30,11 +30,12 @@
 # All that you do to add a file is edit OBJS, the rest will just work

 prefix = /usr
+prefix_spu = $(prefix)/spu
 DESTDIR =

 OBJS = fabsd2.o fabsf4.o truncf4.o divf4.o tanf4.o isnanf4.o isnand2.o isinff4.o isinfd2.o \
 	is0denormf4.o is0denormd2.o recipd2.o divd2.o tand2.o sqrtf4.o absi4.o sqrtd2.o \
-	sinf4.o isgreaterd2.o sind2.o sincosf4.o rsqrtf4.o signbitf4.o signbitd2.o \
+	sinf4.o isgreaterd2.o sind2.o sincosd2.o sincosf4.o rsqrtf4.o signbitf4.o signbitd2.o \
 	rsqrtd2.o copysignf4.o remainderf4.o recipf4.o copysignd2.o log2f4.o \
 	negatef4.o negated2.o modff4.o asinf4.o frexpf4.o frexpd2.o ldexpf4.o cbrtf4.o \
 	cosd2.o cosf4.o hypotf4.o hypotd2.o ceilf4.o fmaf4.o fmaxf4.o fminf4.o floorf4.o \
@@ -51,7 +52,7 @@ OBJS = fabsd2.o fabsf4.o truncf4.o divf4.o tanf4.o isnanf4.o isnand2.o isinff4.o
 	fmodd2.o remainderd2.o


-INCLUDES_SPU = -I../
+INCLUDES_SPU = -I. -I../common

 CROSS_SPU = spu-
 AR_SPU = $(CROSS_SPU)ar
@@ -66,6 +67,7 @@ INSTALL = install

 MAKE_DEFS = \
 	prefix='$(prefix)' \
+	prefix_spu='$(prefix_spu)' \
 	DESTDIR='$(DESTDIR)' \
 	LIB_BASE='$(LIB_BASE)' \
 	LIB_NAME='$(LIB_NAME)' \
@@ -89,43 +91,28 @@ $(STATIC_LIB): $(OBJS)
 	$(RANLIB_SPU) $@

 install: $(STATIC_LIB)
-	$(INSTALL) -m 755 -d $(DESTDIR)$(prefix)/spu/include
-	$(INSTALL) -m 644 ../simdmath.h $(DESTDIR)$(prefix)/spu/include/
-	$(INSTALL) -m 755 -d $(DESTDIR)$(prefix)/spu/lib
-	$(INSTALL) $(STATIC_LIB) $(DESTDIR)$(prefix)/spu/lib/$(STATIC_LIB)
+	$(INSTALL) -m 755 -d $(DESTDIR)$(prefix_spu)/include
+	$(INSTALL) -m 755 -d $(DESTDIR)$(prefix_spu)/include/simdmath
+	$(INSTALL) -m 644 simdmath/*.h $(DESTDIR)$(prefix_spu)/include/simdmath/
+	$(INSTALL) -m 755 -d $(DESTDIR)$(prefix_spu)/lib
+	$(INSTALL) $(STATIC_LIB) $(DESTDIR)$(prefix_spu)/lib/$(STATIC_LIB)

 clean:
 	cd tests; $(MAKE) $(MAKE_DEFS) clean
 	rm -f $(OBJS)
 	rm -f $(STATIC_LIB)

-$(OBJS): ../simdmath.h
+$(OBJS): ../common/simdmath.h

 check: $(STATIC_LIB)
 	cd tests; $(MAKE) $(MAKE_DEFS); $(MAKE) $(MAKE_DEFS) check


 # Some Objects have special header files.
-sinf4.o sind2.o sincosf4.o cosd2.o: sincos_c.h
-lldivu2.o lldivi2.o : lldiv.h
+sinf4.o sind2.o sincosf4.o cosd2.o: ../common/simdmath/_sincos.h
+lldivu2.o lldivi2.o : simdmath/_lldiv.h



-%.o: %.c
+%.o: ../common/%.c simdmath/%.h
 	$(CC_SPU) $(CFLAGS_SPU) -c $<
-
-#----------
-#   C++
-#----------
-%.o: %.C
-	$(CXX_SPU) $(CFLAGS_SPU) -c $<
-
-%.o: %.cpp
-	$(CXX_SPU) $(CFLAGS_SPU) -c $<
-
-%.o: %.cc
-	$(CXX_SPU) $(CFLAGS_SPU) -c $<
-
-%.o: %.cxx
-	$(CXX_SPU) $(CFLAGS_SPU) -c $<
-
--- a/Extras/simdmathlibrary/spu/cosd2.c
+++ b/Extras/simdmathlibrary/spu/cosd2.c
@@ -1,127 +0,0 @@
-/* cosd2 - Computes the cosine  of the each of two double slots.
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <simdmath.h>
-#include <spu_intrinsics.h>
-
-#include "sincos_c.h"
-
-vector double
-cosd2 (vector double x)
-{
-    vec_double2 xl,xl2,xl3,res;
-    vec_double2 nan = (vec_double2)spu_splats(0x7ff8000000000000ull);
-    vec_uchar16 copyEven = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
-    vec_double2 tiny = (vec_double2)spu_splats(0x3e40000000000000ull);
-
-    // Range reduction using : xl = angle * TwoOverPi;
-    //  
-    xl = spu_mul(x, spu_splats(0.63661977236758134307553505349005744));
-
-    // Find the quadrant the angle falls in
-    // using:  q = (int) (ceil(abs(x))*sign(x))
-    //
-    xl = spu_add(xl,spu_sel(spu_splats(0.5),xl,spu_splats(0x8000000000000000ull)));
-    vec_float4 xf = spu_roundtf(xl);
-    vec_int4 q = spu_convts(xf,0);
-    q = spu_shuffle(q,q,copyEven);
-
-     
-    // Compute an offset based on the quadrant that the angle falls in
-    // 
-    vec_int4 offset = spu_add(spu_splats(1), spu_and(q,spu_splats(0x3)));
-
-    // Remainder in range [-pi/4..pi/4]
-    //
-    vec_float4 qf = spu_convtf(q,0);
-    vec_double2 qd = spu_extend(qf);
-    vec_double2 p1 = spu_nmsub(qd,spu_splats(_SINCOS_KC1D),x);
-    xl = spu_nmsub(qd,spu_splats(_SINCOS_KC2D),p1);
-
-    // Check if |xl| is a really small number
-    //
-    vec_double2 absXl = (vec_double2)spu_andc((vec_ullong2)xl, spu_splats(0x8000000000000000ull));
-    vec_ullong2 isTiny = (vec_ullong2)isgreaterd2(tiny,absXl);
-
-    // Compute x^2 and x^3
-    //
-    xl2 = spu_mul(xl,xl);
-    xl3 = spu_mul(xl2,xl);
-    
-    // Compute both the sin and cos of the angles
-    // using a polynomial expression:
-    //   cx = 1.0f + xl2 * ((((((c0 * xl2 + c1) * xl2 + c2) * xl2 + c3) * xl2 + c4) * xl2 + c5), and
-    //   sx = xl + xl3 * (((((s0 * xl2 + s1) * xl2 + s2) * xl2 + s3) * xl2 + s4) * xl2 + s5)
-    //
-
-    vec_double2 ct0 = spu_mul(xl2,xl2);
-    vec_double2 ct1 = spu_madd(spu_splats(_SINCOS_CC0D),xl2,spu_splats(_SINCOS_CC1D));
-    vec_double2 ct2 = spu_madd(spu_splats(_SINCOS_CC2D),xl2,spu_splats(_SINCOS_CC3D));
-    vec_double2 ct3 = spu_madd(spu_splats(_SINCOS_CC4D),xl2,spu_splats(_SINCOS_CC5D));
-    vec_double2 st1 = spu_madd(spu_splats(_SINCOS_SC0D),xl2,spu_splats(_SINCOS_SC1D));
-    vec_double2 st2 = spu_madd(spu_splats(_SINCOS_SC2D),xl2,spu_splats(_SINCOS_SC3D));
-    vec_double2 st3 = spu_madd(spu_splats(_SINCOS_SC4D),xl2,spu_splats(_SINCOS_SC5D));
-    vec_double2 ct4 = spu_madd(ct2,ct0,ct3);
-    vec_double2 st4 = spu_madd(st2,ct0,st3);
-    vec_double2 ct5 = spu_mul(ct0,ct0);
-    
-    vec_double2 ct6 = spu_madd(ct5,ct1,ct4);
-    vec_double2 st6 = spu_madd(ct5,st1,st4);
-
-    vec_double2 cx = spu_madd(ct6,xl2,spu_splats(1.0));
-    vec_double2 sx = spu_madd(st6,xl3,xl);
-
-    // Small angle approximation: sin(tiny) = tiny, cos(tiny) = 1.0
-    //
-    sx = spu_sel(sx,xl,isTiny);
-    cx = spu_sel(cx,spu_splats(1.0),isTiny);
-
-    // Use the cosine when the offset is odd and the sin
-    // when the offset is even
-    //
-    vec_ullong2 mask1 = (vec_ullong2)spu_cmpeq(spu_and(offset,(int)0x1),spu_splats((int)0));
-    res = spu_sel(cx,sx,mask1);
-
-    // Flip the sign of the result when (offset mod 4) = 1 or 2
-    //
-    vec_ullong2 mask2 = (vec_ullong2)spu_cmpeq(spu_and(offset,(int)0x2),spu_splats((int)0));
-    mask2 = spu_shuffle(mask2,mask2,copyEven);
-    res = spu_sel((vec_double2)spu_xor(spu_splats(0x8000000000000000ull),(vec_ullong2)res),res,mask2);
-    // if input = +/-Inf return NAN
-    //
-    res = spu_sel(res, nan, isnand2 (x));
-
-    // if input = 0 or denorm return or 1.0 
-    //
-    vec_ullong2 zeroMask = is0denormd2 (x);
-    res = spu_sel(res,spu_splats(1.0),zeroMask);
-    return res;
-}
-
-
--- a/Extras/simdmathlibrary/spu/cosf4.c
+++ b/Extras/simdmathlibrary/spu/cosf4.c
@@ -1,94 +0,0 @@
-/* cosf4 - Computes the cosine of each of the four slots by using a polynomial approximation
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <simdmath.h>
-#include <spu_intrinsics.h>
-#include "sincos_c.h"
-
-vector float
-cosf4 (vector float x)
-{
-    vec_float4 xl,xl2,xl3,res;
-    vec_int4   q;
-
-    // Range reduction using : xl = angle * TwoOverPi;
-    //  
-    xl = spu_mul(x, spu_splats(0.63661977236f));
-
-    // Find the quadrant the angle falls in
-    // using:  q = (int) (ceil(abs(xl))*sign(xl))
-    //
-    xl = spu_add(xl,spu_sel(spu_splats(0.5f),xl,spu_splats(0x80000000)));
-    q = spu_convts(xl,0);
-
-     
-    // Compute an offset based on the quadrant that the angle falls in
-    // 
-    vec_int4 offset = spu_add(spu_splats(1),spu_and(q,spu_splats((int)0x3)));
-
-    // Remainder in range [-pi/4..pi/4]
-    //
-    vec_float4 qf = spu_convtf(q,0);
-    vec_float4 p1 = spu_nmsub(qf,spu_splats(_SINCOS_KC1),x);
-    xl  = spu_nmsub(qf,spu_splats(_SINCOS_KC2),p1);
-    
-    // Compute x^2 and x^3
-    //
-    xl2 = spu_mul(xl,xl);
-    xl3 = spu_mul(xl2,xl);
-    
-
-    // Compute both the sin and cos of the angles
-    // using a polynomial expression:
-    //   cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and
-    //   sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2)
-    //
-    vec_float4 ct1 = spu_madd(spu_splats(_SINCOS_CC0),xl2,spu_splats(_SINCOS_CC1));
-    vec_float4 st1 = spu_madd(spu_splats(_SINCOS_SC0),xl2,spu_splats(_SINCOS_SC1));
-
-    vec_float4 ct2 = spu_madd(ct1,xl2,spu_splats(_SINCOS_CC2));
-    vec_float4 st2 = spu_madd(st1,xl2,spu_splats(_SINCOS_SC2));
-    
-    vec_float4 cx = spu_madd(ct2,xl2,spu_splats(1.0f));
-    vec_float4 sx = spu_madd(st2,xl3,xl);
-
-    // Use the cosine when the offset is odd and the sin
-    // when the offset is even
-    //
-    vec_uchar16 mask1 = (vec_uchar16)spu_cmpeq(spu_and(offset,(int)0x1),spu_splats((int)0));
-    res = spu_sel(cx,sx,mask1);
-
-    // Flip the sign of the result when (offset mod 4) = 1 or 2
-    //
-    vec_uchar16 mask2 = (vec_uchar16)spu_cmpeq(spu_and(offset,(int)0x2),spu_splats((int)0));
-    res = spu_sel((vec_float4)spu_xor(spu_splats(0x80000000),(vec_uint4)res),res,mask2);
-
-    return res;
-    
-}
--- a/Extras/simdmathlibrary/spu/divi4.c
+++ b/Extras/simdmathlibrary/spu/divi4.c
@@ -1,109 +0,0 @@
-/* divi4 - 
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <simdmath.h>
-#include <spu_intrinsics.h>
-
-// divi4 - for each of four integer slots, compute quotient and remainder of numer/denom
-// and store in divi4_t struct.  Divide by zero produces quotient = 0, remainder = numerator.
-
-divi4_t divi4 (vector signed int numer, vector signed int denom)
-{
-   divi4_t res;
-   vec_int4 quot, newQuot, shift;
-   vec_uint4 numerPos, denomPos, quotNeg;
-   vec_uint4 numerAbs, denomAbs;
-   vec_uint4 denomZeros, numerZeros, denomLeft, oneLeft, denomShifted, oneShifted;
-   vec_uint4 newNum, skip, cont;
-   int       anyCont;
-
-   // Determine whether result needs sign change
-
-   numerPos = spu_cmpgt( numer, -1 );
-   denomPos = spu_cmpgt( denom, -1 );
-   quotNeg = spu_xor( numerPos, denomPos );
-    
-   // Use absolute values of numerator, denominator
-
-   numerAbs = (vec_uint4)spu_sel( spu_sub( 0, numer ), numer, numerPos );
-   denomAbs = (vec_uint4)spu_sel( spu_sub( 0, denom ), denom, denomPos );
-
-   // Get difference of leading zeros.
-   // Any possible negative value will be interpreted as a shift > 31
-
-   denomZeros = spu_cntlz( denomAbs );
-   numerZeros = spu_cntlz( numerAbs );
-
-   shift = (vec_int4)spu_sub( denomZeros, numerZeros );
-
-   // Shift denom to align leading one with numerator's
-
-   denomShifted = spu_sl( denomAbs, (vec_uint4)shift );
-   oneShifted = spu_sl( (vec_uint4)spu_splats(1), (vec_uint4)shift );
-   oneShifted = spu_sel( oneShifted, (vec_uint4)spu_splats(0), spu_cmpeq( denom, 0 ) );
-
-   // Shift left all leading zeros.
-
-   denomLeft = spu_sl( denomAbs, denomZeros );
-   oneLeft = spu_sl( (vec_uint4)spu_splats(1), denomZeros );
-
-   quot = spu_splats(0);
-
-   do
-   {
-      cont = spu_cmpgt( oneShifted, 0U );
-      anyCont = spu_extract( spu_gather( cont ), 0 );
-
-      newQuot = spu_or( quot, (vec_int4)oneShifted );
-
-      // Subtract shifted denominator from remaining numerator 
-      // when denominator is not greater.
-
-      skip = spu_cmpgt( denomShifted, numerAbs );
-      newNum = spu_sub( numerAbs, denomShifted );
-
-      // If denominator is greater, next shift is one more, otherwise
-      // next shift is number of leading zeros of remaining numerator.
-
-      numerZeros = spu_sel( spu_cntlz( newNum ), numerZeros, skip );
-      shift = (vec_int4)spu_sub( skip, numerZeros );
-
-      oneShifted = spu_rlmask( oneLeft, shift );
-      denomShifted = spu_rlmask( denomLeft, shift );
-
-      quot = spu_sel( newQuot, quot, skip );
-      numerAbs = spu_sel( newNum, numerAbs, spu_orc(skip,cont) );
-   } 
-   while ( anyCont );
-
-   res.quot = spu_sel( quot, spu_sub( 0, quot ), quotNeg );
-   res.rem = spu_sel( spu_sub( 0, (vec_int4)numerAbs ), (vec_int4)numerAbs, numerPos );
-   return res;
-}
-
--- a/Extras/simdmathlibrary/spu/fpclassifyd2.c
+++ b/Extras/simdmathlibrary/spu/fpclassifyd2.c
@@ -1,94 +0,0 @@
-/* fpclassifyd2 - for each element of vector x, return classification of x': FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <simdmath.h>
-#include <spu_intrinsics.h>
-#include <math.h>
-
-#ifndef FP_NAN
-#define FP_NAN			(0)
-#endif
-#ifndef FP_INFINITE
-#define FP_INFINITE		(1)
-#endif
-#ifndef FP_ZERO
-#define FP_ZERO			(2)
-#endif
-#ifndef FP_SUBNORMAL
-#define FP_SUBNORMAL	(3)
-#endif
-#ifndef FP_NORMAL
-#define FP_NORMAL		(4)
-#endif
-
-vector signed long long
-fpclassifyd2 (vector double x)
-{
-   vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
-   vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
-   vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
-
-   vec_ullong2 sign = spu_splats(0x8000000000000000ull);
-   vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
-   vec_ullong2 signexpn = spu_splats(0xfff0000000000000ull);
-   vec_ullong2 zero = spu_splats(0x0000000000000000ull);
-
-   vec_ullong2 mask;
-   vec_llong2 classtype;
-   vec_uint4 cmpgt, cmpeq;
-
-   //FP_NORMAL: normal unless nan, infinity, zero, or denorm
-   classtype = spu_splats((long long)FP_NORMAL);
-
-   //FP_NAN: all-ones exponent and non-zero mantissa
-   cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)signexpn );
-   cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)signexpn );
-   mask = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ),
-          		  			   spu_and( spu_shuffle( cmpeq, cmpeq, even ), 
-   						   				spu_shuffle( cmpgt, cmpgt, odd ) ) );
-   classtype = spu_sel( classtype, spu_splats((long long)FP_NAN), mask );
-
-   //FP_INFINITE: all-ones exponent and zero mantissa
-   mask = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
-   classtype = spu_sel( classtype, spu_splats((long long)FP_INFINITE), mask );
-
-   //FP_ZERO: zero exponent and zero mantissa
-   cmpeq = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
-   mask = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
-   classtype = spu_sel( classtype, spu_splats((long long)FP_ZERO), mask );
-   
-   //FP_SUBNORMAL: zero exponent and non-zero mantissa
-   cmpeq = spu_cmpeq( (vec_uint4)spu_and( (vec_ullong2)x, expn ), (vec_uint4)zero );
-   cmpgt = spu_cmpgt( (vec_uint4)spu_andc( (vec_ullong2)x, signexpn ), (vec_uint4)zero );
-   mask = (vec_ullong2)spu_and( spu_shuffle( cmpeq, cmpeq, even ), 
-   				   				spu_or( cmpgt, spu_shuffle( cmpgt, cmpgt, swapEvenOdd ) ) );
-   classtype = spu_sel( classtype, spu_splats((long long)FP_SUBNORMAL), mask );
-
-   return classtype;
-}
--- a/Extras/simdmathlibrary/spu/frexpd2.c
+++ b/Extras/simdmathlibrary/spu/frexpd2.c
@@ -1,95 +0,0 @@
-/* frexpd2 - for each element of vector x, return the normalized fraction and store the exponent of x'
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <simdmath.h>
-#include <spu_intrinsics.h>
-#include <math.h>
-
-#ifndef DBL_NAN
-#define DBL_NAN ((long long)0x7FF8000000000000ull)
-#endif
-
-vector double
-frexpd2 (vector double x, vector signed long long *pexp)
-{
-   vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
-   vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
-
-   vec_ullong2 maskdw = (vec_ullong2){0xffffffffffffffffull, 0ull};
-
-   vec_ullong2 sign = spu_splats(0x8000000000000000ull);
-   vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
-   vec_ullong2 zero = spu_splats(0x0000000000000000ull);
-
-   vec_ullong2 isnan, isinf, iszero;
-   vec_ullong2 e0, x0, x1;
-   vec_uint4 cmpgt, cmpeq, cmpzr;
-   vec_int4 lz, lz0, sh, ex;
-   vec_double2 fr, frac = (vec_double2)zero;
-
-   //NAN: x is NaN (all-ones exponent and non-zero mantissa)
-   cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
-   cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
-   isnan = (vec_ullong2)spu_or( cmpgt, spu_and( cmpeq, spu_rlqwbyte( cmpgt, -4 ) ) );
-   isnan = (vec_ullong2)spu_shuffle( isnan, isnan, even );
-   frac = spu_sel( frac, (vec_double2)spu_splats((long long)DBL_NAN), isnan );
-
-   //INF: x is infinite (all-ones exponent and zero mantissa)
-   isinf = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
-   frac = spu_sel( frac, x , isinf );
-
-   //x is zero (zero exponent and zero mantissa)
-   cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
-   iszero = (vec_ullong2)spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) );
-
-   frac = spu_sel( frac, (vec_double2)zero , iszero );
-   *pexp = spu_sel( *pexp, (vec_llong2)zero , iszero );
-
-   //Integer Exponent: if x is normal or subnormal
-
-   //...shift left to normalize fraction, zero shift if normal
-   lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) );
-   lz0 = (vec_int4)spu_shuffle( lz, lz, even );
-   sh = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)11) ), spu_cmpgt( lz0, (int)11 ) );
-   sh = spu_sel( sh, spu_add( sh, lz ), spu_cmpeq( lz0, (int)32 ) );
-
-   x0 = spu_slqw( spu_slqwbytebc( spu_and( (vec_ullong2)x, maskdw ), spu_extract(sh, 1) ), spu_extract(sh, 1) );
-   x1 = spu_slqw( spu_slqwbytebc( (vec_ullong2)x, spu_extract(sh, 3) ), spu_extract(sh, 3) );
-   fr = (vec_double2)spu_sel( x1, x0, maskdw );
-   fr = spu_sel( fr, (vec_double2)spu_splats(0x3FE0000000000000ull), expn );
-   fr = spu_sel( fr, x, sign );
-
-   e0 = spu_rlmaskqw( spu_rlmaskqwbyte(spu_and( (vec_ullong2)x, expn ),-6), -4 );
-   ex = spu_sel( spu_sub( (vec_int4)e0, spu_splats((int)1022) ), spu_sub( spu_splats((int)-1021), sh ), spu_cmpgt( sh, (int)0 ) );
-
-   frac = spu_sel( frac, fr, spu_nor( isnan, spu_or( isinf, iszero ) ) );
-   *pexp = spu_sel( *pexp, spu_extend( ex ), spu_nor( isnan, spu_or( isinf, iszero ) ) );
-
-   return frac;
-}
--- a/Extras/simdmathlibrary/spu/ilogbd2.c
+++ b/Extras/simdmathlibrary/spu/ilogbd2.c
@@ -1,84 +0,0 @@
-/* ilogbd2 - for each element of vector x, return integer exponent of normalized double x', FP_ILOGBNAN, or FP_ILOGB0
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <simdmath.h>
-#include <spu_intrinsics.h>
-#include <math.h>
-
-#ifndef FP_ILOGB0
-#define FP_ILOGB0 ((int)0x80000001)
-#endif
-#ifndef FP_ILOGBNAN
-#define FP_ILOGBNAN ((int)0x7FFFFFFF)
-#endif
-
-vector signed long long
-ilogbd2 (vector double x)
-{
-   vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
-   vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
-   vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
-
-   vec_ullong2 sign = spu_splats(0x8000000000000000ull);
-   vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
-   vec_ullong2 zero = spu_splats(0x0000000000000000ull);
-
-   vec_ullong2 isnan, iszeroinf;
-   vec_llong2 ilogb = (vec_llong2)zero;
-   vec_llong2 e1, e2;
-   vec_uint4 cmpgt, cmpeq, cmpzr;
-   vec_int4 lz, lz0, lz1;
-
-   //FP_ILOGBNAN: x is NaN (all-ones exponent and non-zero mantissa)
-   cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
-   cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
-   isnan = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ),
-                  				spu_and( spu_shuffle( cmpeq, cmpeq, even ), 
-                           	 			 spu_shuffle( cmpgt, cmpgt, odd ) ) );
-   ilogb = spu_sel( ilogb, spu_splats((long long)FP_ILOGBNAN), isnan );
-
-   //FP_ILOGB0: x is zero (zero exponent and zero mantissa) or infinity (all-ones exponent and zero mantissa)
-   cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
-   iszeroinf = (vec_ullong2)spu_or( spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) ),
-					    			spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) ) );
-   ilogb = spu_sel( ilogb, spu_splats((long long)FP_ILOGB0), iszeroinf );
-
-   //Integer Exponent: if x is normal or subnormal, return unbiased exponent of normalized double x
-   e1 = (vec_llong2)spu_and( (vec_llong2)x, (vec_llong2)expn );
-   e2 = (vec_llong2)spu_rlmaskqw( spu_rlmaskqwbyte(e1,-6), -4 );
-
-   lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) );
-   lz0 = (vec_int4)spu_shuffle( lz, lz, even );
-   lz0 = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)12) ), spu_cmpgt( lz0, (int)11 ) );
-   lz1 = spu_sel( (vec_int4)zero, spu_shuffle( lz, lz, odd), spu_cmpeq( lz0, (int)20 ) );
-
-   ilogb = spu_sel( ilogb, spu_extend( spu_sub( spu_sub( (vec_int4)e2, spu_splats((int)1023)), spu_add( lz0, lz1 ) ) ), spu_nor( isnan, iszeroinf ) );
-
-   return ilogb;
-}
--- a/Extras/simdmathlibrary/spu/lldiv.h
+++ b/Extras/simdmathlibrary/spu/lldiv.h
@@ -1,123 +0,0 @@
-/* Common functions for lldivi2/lldivu2
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __LLDIV_H__
-#define __LLDIV_H__
-
-#include <spu_intrinsics.h>
-
-static inline vector unsigned long long ll_spu_cntlz(vector unsigned long long x);
-static inline vector unsigned long long ll_spu_sl(vector unsigned long long x, vector unsigned long long count);
-static inline vector unsigned long long ll_spu_rlmask(vector unsigned long long x, vector unsigned long long count);
-static inline vector unsigned long long ll_spu_cmpeq_zero(vector unsigned long long x);
-static inline vector unsigned long long ll_spu_cmpgt(vector unsigned long long x, vector unsigned long long y);
-static inline vector unsigned long long ll_spu_sub(vector unsigned long long x, vector unsigned long long y);
-
-static inline vector unsigned long long 
-ll_spu_cntlz(vector unsigned long long x)
-{
-   vec_uint4 cnt;
-
-   cnt = spu_cntlz((vec_uint4)x);
-   cnt = spu_add(cnt, spu_and(spu_cmpeq(cnt, 32), spu_rlqwbyte(cnt, 4)));
-   cnt = spu_shuffle(cnt, cnt, ((vec_uchar16){0x80,0x80,0x80,0x80, 0,1,2,3, 0x80,0x80,0x80,0x80, 8,9,10,11}));
-
-   return (vec_ullong2)cnt;
-}
-
-static inline vector unsigned long long 
-ll_spu_sl(vector unsigned long long x, vector unsigned long long count)
-{
-   vec_ullong2 mask = (vec_ullong2){0xffffffffffffffffull, 0ull};
-   vec_ullong2 x_upper, x_lower;
-
-   // shift upper word
-   x_upper = spu_and(x, mask);
-   x_upper = spu_slqwbytebc(x_upper, spu_extract((vec_uint4)count, 1));
-   x_upper = spu_slqw(x_upper, spu_extract((vec_uint4)count, 1));
-  
-   // shift lower word
-   x_lower = spu_slqwbytebc(x, spu_extract((vec_uint4)count, 3));
-   x_lower = spu_slqw(x_lower, spu_extract((vec_uint4)count, 3));
-
-   return spu_sel(x_lower, x_upper, mask);
-}
-
-static inline vector unsigned long long 
-ll_spu_rlmask(vector unsigned long long x, vector unsigned long long count)
-{
-   vec_ullong2 mask = (vec_ullong2){0xffffffffffffffffull, 0ull};
-   vec_ullong2 x_upper, x_lower;
-   vec_uint4 cnt_byte;
-
-   cnt_byte = spu_add((vec_uint4)count, 7);
-
-   // shift upper word
-   x_upper = spu_rlmaskqwbytebc(x, spu_extract(cnt_byte, 1));
-   x_upper = spu_rlmaskqw(x_upper, spu_extract((vec_uint4)count, 1));
-  
-   // shift lower word
-   x_lower = spu_andc(x, mask);
-   x_lower = spu_rlmaskqwbytebc(x_lower, spu_extract(cnt_byte, 3));
-   x_lower = spu_rlmaskqw(x_lower, spu_extract((vec_uint4)count, 3));
-
-   return spu_sel(x_lower, x_upper, mask);
-}
-
-static inline vector unsigned long long 
-ll_spu_cmpeq_zero(vector unsigned long long x)
-{
-   vec_uint4 cmp;
-
-   cmp = spu_cmpeq((vec_uint4)x, 0);
-   return (vec_ullong2)spu_and(cmp, spu_shuffle(cmp, cmp, ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11})));
-}
-
-static inline vector unsigned long long 
-ll_spu_cmpgt(vector unsigned long long x, vector unsigned long long y)
-{
-   vec_uint4 gt;
-
-   gt = spu_cmpgt((vec_uint4)x, (vec_uint4)y);
-   gt = spu_sel(gt, spu_rlqwbyte(gt, 4), spu_cmpeq((vec_uint4)x, (vec_uint4)y));
-   return (vec_ullong2)spu_shuffle(gt, gt, ((vec_uchar16){0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11}));
-}
-
-static inline vector unsigned long long 
-ll_spu_sub(vector unsigned long long x, vector unsigned long long y)
-{
-  vec_uint4 borrow;
-
-  borrow = spu_genb((vec_uint4)x, (vec_uint4)y);
-  borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){4,5,6,7, 0xc0,0xc0,0xc0,0xc0, 12,13,14,15, 0xc0,0xc0,0xc0,0xc0}));
-  return (vec_ullong2)spu_subx((vec_uint4)x, (vec_uint4)y, borrow);
-}
-
-#endif // __LLDIV_H__
-
--- a/Extras/simdmathlibrary/spu/lldivi2.c
+++ b/Extras/simdmathlibrary/spu/lldivi2.c
@@ -1,128 +0,0 @@
-/* lldivi2 - 
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <simdmath.h>
-#include <spu_intrinsics.h>
-#include "lldiv.h"
-
-static inline vector signed long long _negatell2 (vector signed long long x);
-
-static inline vector signed long long
-_negatell2 (vector signed long long x)
-{
-  vector signed int zero = (vector signed int){0,0,0,0};
-  vector signed int borrow;
-
-  borrow = spu_genb(zero, (vec_int4)x);
-  borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){4,5,6,7, 0xc0,0xc0,0xc0,0xc0, 12,13,14,15, 0xc0,0xc0,0xc0,0xc0}));
-  return (vec_llong2)spu_subx(zero, (vec_int4)x, borrow);
-}
-
-// lldivi2 - for each of two signed long long interger slots, compute quotient and remainder of 
-// numer/denom and store in lldivi2_t struct.  Divide by zero produces quotient = 0, remainder = numerator.
-
-lldivi2_t lldivi2 (vector signed long long numer, vector signed long long denom)
-{
-   lldivi2_t res;
-   vec_ullong2 numerAbs, denomAbs;
-   vec_uint4 numerPos, denomPos, quotNeg;
-
-   vec_uint4 denomZeros, numerZeros;
-   vec_int4 shift;
-   vec_ullong2 denomShifted, oneShifted, denomLeft, oneLeft;
-   vec_ullong2 quot, newQuot;
-   vec_ullong2 newNum, skip, cont;
-   int       anyCont;
-
-   // Determine whether result needs sign change
-
-   numerPos = spu_cmpgt((vec_int4)numer, -1);
-   numerPos = spu_shuffle(numerPos, numerPos, ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}));
-   denomPos = spu_cmpgt((vec_int4)denom, -1);
-   denomPos = spu_shuffle(denomPos, denomPos, ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}));
-   quotNeg = spu_xor( numerPos, denomPos );
-    
-   // Use absolute values of numerator, denominator
-
-   numerAbs = (vec_ullong2)spu_sel(_negatell2(numer), numer, (vec_ullong2)numerPos);
-   denomAbs = (vec_ullong2)spu_sel(_negatell2(denom), denom, (vec_ullong2)denomPos);
-
-   // Get difference of leading zeros.
-
-   denomZeros = (vec_uint4)ll_spu_cntlz( denomAbs );
-   numerZeros = (vec_uint4)ll_spu_cntlz( numerAbs );
-
-   shift = (vec_int4)spu_sub( denomZeros, numerZeros );
-
-   // Shift denom to align leading one with numerator's
-
-   denomShifted = ll_spu_sl( denomAbs, (vec_ullong2)shift );
-   oneShifted = ll_spu_sl( spu_splats(1ull), (vec_ullong2)shift );
-   oneShifted = spu_sel( oneShifted, spu_splats(0ull), ll_spu_cmpeq_zero( denomAbs ) );
-
-   // Shift left all leading zeros.
-
-   denomLeft = ll_spu_sl( denomAbs, (vec_ullong2)denomZeros );
-   oneLeft = ll_spu_sl( spu_splats(1ull), (vec_ullong2)denomZeros );
-
-   quot = spu_splats(0ull);
-
-   do
-   {
-      cont = ll_spu_cmpgt( oneShifted, spu_splats(0ull) );
-      anyCont = spu_extract( spu_gather((vec_uint4)cont ), 0 );
-
-      newQuot = spu_or( quot, oneShifted );
-
-      // Subtract shifted denominator from remaining numerator 
-      // when denominator is not greater.
-
-      skip = ll_spu_cmpgt( denomShifted, numerAbs );
-      newNum = ll_spu_sub( numerAbs, denomShifted );
-
-      // If denominator is greater, next shift is one more, otherwise
-      // next shift is number of leading zeros of remaining numerator.
-
-      numerZeros = (vec_uint4)spu_sel( ll_spu_cntlz( newNum ), (vec_ullong2)numerZeros, skip );
-      shift = (vec_int4)spu_sub( (vec_uint4)skip, numerZeros );
-
-      oneShifted = ll_spu_rlmask( oneLeft, (vec_ullong2)shift );
-      denomShifted = ll_spu_rlmask( denomLeft, (vec_ullong2)shift );
-
-      quot = spu_sel( newQuot, quot, skip );
-      numerAbs = spu_sel( newNum, numerAbs, spu_orc(skip,cont) );
-   } 
-   while ( anyCont );
-
-   res.quot = spu_sel((vec_llong2)quot, _negatell2((vec_llong2)quot), (vec_ullong2)quotNeg);
-   res.rem = spu_sel(_negatell2((vec_llong2)numerAbs), (vec_llong2)numerAbs, (vec_ullong2)numerPos);
-
-   return res;
-}
-
--- a/Extras/simdmathlibrary/spu/logbd2.c
+++ b/Extras/simdmathlibrary/spu/logbd2.c
@@ -1,93 +0,0 @@
-/* logbd2 - for each element of vector x, return the exponent of normalized double x' as floating point value
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <simdmath.h>
-#include <spu_intrinsics.h>
-#include <math.h>
-
-#ifndef HUGE_VALL
-#define HUGE_VALL __builtin_huge_vall ()
-#endif
-
-#ifndef DBL_INF
-#define DBL_INF ((long long)0x7FF0000000000000ull)
-#endif
-
-#ifndef DBL_NAN
-#define DBL_NAN ((long long)0x7FF8000000000000ull)
-#endif
-
-vector double
-logbd2 (vector double x)
-{
-   vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
-   vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
-   vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
-
-   vec_ullong2 sign = spu_splats(0x8000000000000000ull);
-   vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
-   vec_ullong2 zero = spu_splats(0x0000000000000000ull);
-
-   vec_ullong2 isnan, isinf, iszero;
-   vec_double2 logb = (vec_double2)zero;
-   vec_llong2 e1, e2;
-   vec_uint4 cmpgt, cmpeq, cmpzr;
-   vec_int4 lz, lz0, lz1;
-
-   //NAN: x is NaN (all-ones exponent and non-zero mantissa)
-   cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
-   cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
-   isnan = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ),
-                  				spu_and( spu_shuffle( cmpeq, cmpeq, even ), 
-                           	 			 spu_shuffle( cmpgt, cmpgt, odd ) ) );
-   logb = spu_sel( logb, (vec_double2)spu_splats((long long)DBL_NAN), isnan );
-
-   //INF: x is infinite (all-ones exponent and zero mantissa)
-   isinf = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
-   logb = spu_sel( logb, (vec_double2)spu_splats((long long)DBL_INF), isinf );
-
-   //HUGE_VAL: x is zero (zero exponent and zero mantissa)
-   cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
-   iszero = (vec_ullong2)spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) );
-   logb = spu_sel( logb, (vec_double2)spu_splats((long long)-HUGE_VALL), iszero );
-
-   //Integer Exponent: if x is normal or subnormal, return unbiased exponent of normalized double x
-   e1 = (vec_llong2)spu_and( (vec_llong2)x, (vec_llong2)expn );
-   e2 = (vec_llong2)spu_rlmask((vec_uint4)e1, -20);
-
-   lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) );
-   lz0 = (vec_int4)spu_shuffle( lz, lz, even );
-   lz0 = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)12) ), spu_cmpgt( lz0, (int)11 ) );
-   lz1 = spu_sel( (vec_int4)zero, spu_shuffle( lz, lz, odd), spu_cmpeq( lz0, (int)20 ) );
-
-   logb = spu_sel( logb, spu_extend( spu_convtf( spu_sub( spu_sub( (vec_int4)e2, spu_splats((int)1023) ), spu_add( lz0, lz1 ) ), 0 ) ), 
-   						 spu_nor( isnan, spu_or( isinf, iszero ) ) );
-
-   return logb;
-}
--- a/Extras/simdmathlibrary/spu/nextafterd2.c
+++ b/Extras/simdmathlibrary/spu/nextafterd2.c
@@ -1,92 +0,0 @@
-/* nextafterd2 - find next representable floating-point value towards 2nd param.
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <simdmath.h>
-#include <spu_intrinsics.h>
-
-vector double
-nextafterd2 (vector double xx, vector double yy)
-{
-	vec_uint4 abs_x, abs_y, sign_x, abs_dif;
-	vec_uint4 is_sub, is_zerox, is_zeroy;
-	vec_uint4 is_equal, is_infy, is_nany;
-	vec_uint4 res0, res1, res;
-	vec_uint4 vec_zero = ((vec_uint4){0,0,0,0});
-	vec_uint4 vec_one  = ((vec_uint4){0,1,0,1});
-	vec_uint4 vec_m1   = ((vec_uint4){0x80000000,1,0x80000000,1});
-	vec_uint4 msk_exp  = ((vec_uint4){0x7FF00000,0,0x7FF00000,0});
-	vec_uint4 msk_abs  = ((vec_uint4){0x7FFFFFFF,-1,0x7FFFFFFF,-1});
-	vec_uchar16 msk_all_eq = ((vec_uchar16){4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11});
-
-	// mask sign bit
-	abs_x = spu_and( (vec_uint4)xx, msk_abs);
-	abs_y = spu_and( (vec_uint4)yy, msk_abs);
-
-	is_zerox = spu_cmpeq( abs_x, vec_zero);
-	is_zerox = spu_and( is_zerox, spu_shuffle(is_zerox,is_zerox,msk_all_eq));
-
-	// -0 exception
-	sign_x = spu_and((vec_uint4)xx, ((vec_uint4){0x80000000,0,0x80000000,0}));
-	sign_x = spu_sel(sign_x, vec_zero, is_zerox);
-
-	// if same sign |y| < |x| -> decrease 
-	abs_dif = spu_subx(abs_y, abs_x, spu_rlqwbyte(spu_genb(abs_y, abs_x), 4));
-	is_sub = spu_xor((vec_uint4)yy, sign_x);	// not same sign -> decrease
-	is_sub = spu_or(is_sub, abs_dif);
-	is_sub = spu_rlmaska(is_sub, -31);
-	is_sub = spu_shuffle(is_sub,is_sub,((vec_uchar16){0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8}));
-
-	res0 = spu_addx( abs_x, vec_one, spu_rlqwbyte(spu_genc(abs_x,vec_one),4)); // calc increase
-	res1 = spu_subx( abs_x, vec_one, spu_rlqwbyte(spu_genb(abs_x,vec_one),4)); // calc decrease
-	res  = spu_sel( res0, res1, is_sub);	// select increase or decrease
-	res  = spu_or( res, sign_x);			// set sign
-
-	// check exception
-	// 0 -> -1
-	res = spu_sel(res, vec_m1, spu_and(is_zerox, is_sub));
-
-	// check equal (include 0,-0)
-	is_zeroy = spu_cmpeq( abs_y, vec_zero);
-	is_zeroy = spu_and( is_zeroy, spu_shuffle(is_zeroy,is_zeroy,msk_all_eq));
-	is_equal = spu_cmpeq((vec_uint4)xx, (vec_uint4)yy);
-	is_equal = spu_and(is_equal, spu_shuffle(is_equal,is_equal,msk_all_eq));
-	is_equal = spu_or(is_equal, spu_and(is_zeroy, is_zerox));
-	res = spu_sel(res, (vec_uint4)yy, is_equal);
-
-	// check nan
-	is_infy = spu_cmpeq( abs_y, msk_exp);
-	is_infy = spu_and( is_infy, spu_shuffle(is_infy,is_infy,msk_all_eq));
-	is_nany = spu_and( abs_y, msk_exp);
-	is_nany = spu_cmpeq( is_nany, msk_exp);
-	is_nany = spu_and( is_nany, spu_shuffle(is_nany,is_nany,msk_all_eq));
-	is_nany = spu_sel( is_nany, vec_zero, is_infy);
-	res = spu_sel(res, (vec_uint4)yy, is_nany);
-
-	return (vec_double2)res;
-}
--- a/Extras/simdmathlibrary/spu/powf4.c
+++ b/Extras/simdmathlibrary/spu/powf4.c
@@ -1,72 +0,0 @@
-/* powf4 - 
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#include <simdmath.h>
-#include <spu_intrinsics.h>
-
-vector float
-powf4 (vector float x, vector float y)
-{
-  vec_int4 zeros = spu_splats((int)0);
-  vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq((vec_float4)zeros, x);
-
-  vec_uchar16 negmask  = (vec_uchar16)spu_cmpgt(spu_splats(0.0f), x);
-  
-  vec_float4 sbit = (vec_float4)spu_splats((int)0x80000000);
-  vec_float4 absx = spu_andc(x, sbit);
-  vec_float4 absy = spu_andc(y, sbit);
-  vec_uint4 oddy = spu_and(spu_convtu(absy, 0), (vec_uint4)spu_splats(0x00000001));
-  negmask = spu_and(negmask, (vec_uchar16)spu_cmpgt(oddy, (vec_uint4)zeros));
-
-  vec_float4 res = exp2f4(spu_mul(y, log2f4(absx)));
-  res = spu_sel(res, spu_or(sbit, res), negmask);
-
-
-  return spu_sel(res, (vec_float4)zeros, zeromask);
-}
-
-/*
-{
-  vec_int4 zeros = spu_splats(0);
-  vec_int4 ones  = (vec_int4)spu_splats((char)0xFF);
-  vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq((vec_float4)zeros, x);
-  vec_uchar16 onemask  = (vec_uchar16)spu_cmpeq((vec_float4)ones , y);
-  vec_uchar16 negmask  = (vec_uchar16)spu_cmpgt(spu_splats(0.0f),  x);
-  vec_float4 sbit = (vec_float4)spu_splats((int)0x80000000);
-  vec_float4 absx = spu_andc(x, sbit);
-  vec_float4 absy = spu_andc(y, sbit);
-  vec_uint4  oddy = spu_and(spu_convtu(absy, 0), (vec_uint4)spu_splats(0x00000001));
-  negmask         = spu_and(negmask, (vec_uchar16)spu_cmpgt(oddy, (vec_uint4)zeros));
-
-  
-
-}
-
-*/
--- a/Extras/simdmathlibrary/spu/remainderd2.c
+++ b/Extras/simdmathlibrary/spu/remainderd2.c
@@ -1,313 +0,0 @@
-/* A vector double is returned that contains the remainder xi REM yi,
-        for the corresponding elements of vector double x and vector double y.
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <simdmath.h>
-#include <spu_intrinsics.h>
-
-
-static inline vec_uint4 _sub_d_(vec_uint4 aa, vec_uint4 bb);
-static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb);
-static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb);
-static inline vec_uint4 _twice(vec_uint4 aa);
-
-vector double 
-remainderd2(vector double x, vector double yy)
-{
-  vec_uchar16 splat_hi   = ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11});
-  vec_uint4 y_hi;
-  vec_uint4 abs_x, abs_yy, abs_2x, abs_2y;
-  vec_uint4 bias;
-  vec_uint4 nan_out, overflow;
-  vec_uint4 result;
-  vec_uint4 half_smax = spu_splats((unsigned int)0x7FEFFFFF);
-  vec_uint4 sign_mask = (vec_uint4)(spu_splats(0x8000000000000000ULL));
-  vec_uint4 exp_mask  = (vec_uint4)(spu_splats(0x7FF0000000000000ULL));
-  vec_uint4 val_nan   = (vec_uint4)(spu_splats(0x7FF8000000000000ULL));
-  vec_uint4 vec_zero = spu_splats((unsigned int)0);
-  vec_uint4 is_zeroy;
-
-  // cut sign
-  abs_x = spu_andc((vec_uint4)x, sign_mask);
-  abs_yy = spu_andc((vec_uint4)yy, sign_mask);
-  y_hi = spu_shuffle(abs_yy, abs_yy, splat_hi);
-
-
-  // check nan out
-  is_zeroy = spu_cmpeq(abs_yy, vec_zero);
-  is_zeroy = spu_and(is_zeroy, spu_rlqwbyte(is_zeroy, 4));
-  nan_out = _vec_gt64_half(abs_yy, exp_mask);  // y > 7FF00000
-  nan_out = spu_or(nan_out, spu_cmpgt(abs_x, half_smax)); // x >= 7FF0000000000000
-  nan_out = spu_or(nan_out, is_zeroy);                    // y = 0
-  nan_out = spu_shuffle(nan_out, nan_out, splat_hi);
-
-
-  // make y x2
-  abs_2y = _twice(abs_yy); // 2 x y
-
-  /*
-   * use fmodd2 function
-   */
-  // get remainder of y x2
-//  result = (vec_uint4)_fmodd2( x, (vec_double2)abs_2y);
-  {
-    vec_double2 y = (vec_double2)abs_2y;
-
-    int shiftx0, shiftx1, shifty0, shifty1;
-    vec_uchar16 swap_words = ((vec_uchar16){ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11});
-    vec_uchar16 propagate = ((vec_uchar16){ 4,5,6,7, 192,192,192,192, 12,13,14,15, 192,192,192,192});
-//    vec_uchar16 splat_hi = ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11});
-    vec_int4 n, shift;
-    vec_uint4 exp_x, exp_y;
-//    , sign;
-//    vec_uint4 abs_x, abs_y;
-    vec_uint4 abs_y;
-    vec_uint4 mant_x, mant_x0, mant_x1;
-    vec_uint4 mant_y, mant_y0, mant_y1;
-    vec_uint4 mant_0, mant_1;
-    vec_uint4 mant_r, mant_l;
-//    vec_uint4 result;
-    vec_uint4 result0, resultx;
-    vec_uint4 zero_x, zero_y;
-    vec_uint4 denorm_x, denorm_y;
-    vec_uint4 cnt, cnt_x, cnt_y;
-    vec_uint4 shift_x, shift_y;
-    vec_uint4 adj_x, adj_y;
-    vec_uint4 z, borrow, mask;
-    vec_uint4 lsb       = (vec_uint4)(spu_splats(0x0000000000000001ULL));
-//    vec_uint4 sign_mask = (vec_uint4)(spu_splats(0x8000000000000000ULL));
-    vec_uint4 implied_1 = (vec_uint4)(spu_splats(0x0010000000000000ULL));
-    vec_uint4 mant_mask = (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL));
-//    vec_uint4 exp_mask  = (vec_uint4)(spu_splats(0x7FF0000000000000ULL));
-    vec_uint4 merge_sel = ((vec_uint4){0,0,-1,-1});
-//    vec_uint4 vec_zero = spu_splats((unsigned int)0);
-
-//    sign  = spu_and( (vec_uint4)x, sign_mask);
-//    abs_x = spu_andc((vec_uint4)x, sign_mask);
-    abs_y = spu_andc((vec_uint4)y, sign_mask);
-    exp_x = spu_rlmask(abs_x, -20);
-    exp_y = spu_rlmask(abs_y, -20);
-    // get shift count for denorm
-    cnt_x = spu_cntlz(abs_x);
-    cnt_y = spu_cntlz(abs_y);
-    cnt_x = spu_add(cnt_x, spu_sel( vec_zero, spu_rlqwbyte(cnt_x, 4), spu_cmpeq(cnt_x, 32)));
-    cnt_y = spu_add(cnt_y, spu_sel( vec_zero, spu_rlqwbyte(cnt_y, 4), spu_cmpeq(cnt_y, 32)));
-
-    zero_x = spu_cmpgt(cnt_x, 63);  // zero ?
-    zero_y = spu_cmpgt(cnt_y, 63);  // zero ?
-    result0 = spu_or(zero_x, zero_y);
-    result0 = spu_shuffle(result0, result0, splat_hi);
-
-    // 0 - (cnt_x - 11) = 11 - cnt_x
-    shift_x= spu_add(cnt_x, -11);
-    shift_y= spu_add(cnt_y, -11);
-    cnt_x = spu_sub(11, cnt_x);
-    cnt_y = spu_sub(11, cnt_y);
-
-    // count to normalize
-    adj_x = spu_sel(spu_add(exp_x, -1), cnt_x, spu_cmpeq(exp_x, 0));
-    adj_y = spu_sel(spu_add(exp_y, -1), cnt_y, spu_cmpeq(exp_y, 0));
-    adj_x = spu_shuffle(adj_x, adj_x, splat_hi);
-    adj_y = spu_shuffle(adj_y, adj_y, splat_hi);
-
-    // for denorm
-    shiftx0 = spu_extract(shift_x, 0);
-    shiftx1 = spu_extract(shift_x, 2);
-    shifty0 = spu_extract(shift_y, 0);
-    shifty1 = spu_extract(shift_y, 2);
-    mant_x0 = spu_slqwbytebc( spu_slqw(spu_and(abs_x,((vec_uint4){-1,-1,0,0})),shiftx0), shiftx0);
-    mant_y0 = spu_slqwbytebc( spu_slqw(spu_and(abs_y,((vec_uint4){-1,-1,0,0})),shifty0), shifty0);
-    mant_x1 = spu_slqwbytebc( spu_slqw(abs_x,shiftx1), shiftx1);
-    mant_y1 = spu_slqwbytebc( spu_slqw(abs_y,shifty1), shifty1);
-    mant_x = spu_sel(mant_x0, mant_x1, merge_sel);
-    mant_y = spu_sel(mant_y0, mant_y1, merge_sel);
-
-    denorm_x = spu_cmpgt((vec_int4)vec_zero, (vec_int4)adj_x);
-    denorm_y = spu_cmpgt((vec_int4)vec_zero, (vec_int4)adj_y);
-    mant_x = spu_sel(spu_and(abs_x, mant_mask), mant_x, denorm_x);
-    mant_y = spu_sel(spu_and(abs_y, mant_mask), mant_y, denorm_y);
-    mant_x = spu_or(mant_x, implied_1); // hidden bit
-    mant_y = spu_or(mant_y, implied_1); // hidden bit
-
-    // x < y ?
-    resultx = _vec_gt64(abs_y, abs_x);
-
-    n = spu_sub((vec_int4)adj_x, (vec_int4)adj_y);
-    mask = spu_cmpgt(n, 0);
-    mask = spu_andc(mask, resultx);
-
-    while (spu_extract(spu_gather(mask), 0)) {
-      borrow = spu_genb(mant_x, mant_y);
-      borrow = spu_shuffle(borrow, borrow, propagate);
-      z = spu_subx(mant_x, mant_y, borrow);
-
-      result0 = spu_or(spu_and(spu_cmpeq(spu_or(z, spu_shuffle(z, z, swap_words)), 0), mask), result0);
-      
-      mant_x = spu_sel(mant_x, 
-         spu_sel(spu_slqw(mant_x, 1), spu_andc(spu_slqw(z, 1), lsb), spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1)),
-         mask);
-
-      n = spu_add(n, -1);
-      mask = spu_cmpgt(n, 0);
-    }
-
-    borrow = spu_genb(mant_x, mant_y);
-    borrow = spu_shuffle(borrow, borrow, propagate);
-    z = spu_subx(mant_x, mant_y, borrow);
-    mant_x = spu_sel(mant_x, z, spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1));
-    result0 = spu_or(spu_cmpeq(spu_or(mant_x, spu_shuffle(mant_x, mant_x, swap_words)), 0), result0);
-
-    // bring back to original range
-    mant_0 = spu_and(mant_x, ((vec_uint4){0x001FFFFF,-1,0,0}));
-    mant_1 = spu_and(mant_x, ((vec_uint4){0,0,0x001FFFFF,-1}));
-
-    // for adj_y < 0 exp max=1
-    shiftx0 = spu_extract(adj_y, 0);
-    shiftx1 = spu_extract(adj_y, 2);
-    mant_x0 = spu_rlmaskqwbytebc(spu_rlmaskqw(mant_0, shiftx0), 7 + shiftx0);
-    mant_x1 = spu_rlmaskqwbytebc(spu_rlmaskqw(mant_1, shiftx1), 7 + shiftx1);
-    mant_r  = spu_sel(mant_x0, mant_x1, merge_sel);
-
-    // for adj_y >= 0
-    cnt = spu_cntlz(mant_x);
-    cnt = spu_add(cnt, spu_sel( vec_zero, spu_rlqwbyte(cnt, 4), spu_cmpeq(cnt, 32)));
-    cnt = spu_add(cnt, -11);
-    cnt = spu_sel(vec_zero, cnt, spu_cmpgt(cnt, 0)); // for exp >= 1
-    shift = (vec_int4)spu_sel(cnt, adj_y, spu_cmpgt(cnt, adj_y));
-    shiftx0 = spu_extract(shift, 0);
-    shiftx1 = spu_extract(shift, 2);
-    mant_x0 = spu_slqwbytebc(spu_slqw(mant_0, shiftx0), shiftx0);
-    mant_x1 = spu_slqwbytebc(spu_slqw(mant_1, shiftx1), shiftx1);
-    mant_l  = spu_sel(mant_x0, mant_x1, merge_sel);
-    cnt = spu_sub(adj_y, (vec_uint4)shift);
-    mant_l = spu_add(mant_l, spu_and(spu_rl(cnt,20), exp_mask));
-
-    result = spu_sel(mant_l, mant_r, denorm_y);
-    result = spu_sel(result, vec_zero, result0); // reminder 0
-    result = spu_sel(result, abs_x,    resultx); // x < y
-//    result = spu_xor(result, sign);              // set sign
-
-//    return ((vec_double2)result);
-  }
-
-
-//  abs_x = spu_sel(spu_andc(result, sign_mask), abs_x, spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FBFFFFF)));
-  abs_x = spu_sel(result, abs_x, spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FEFFFFF)));
-
-  /* if (2*x > y)
-   *     x -= y
-   *     if (2*x >= y) x -= y
-   */
-  overflow = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FEFFFFF));
-  // make x2
-  abs_2x = _twice(abs_x);  // 2 x x
-
-  bias = _vec_gt64(abs_2x, abs_yy);  // abs_2x > abs_yy
-  bias = spu_andc(bias, overflow);
-
-  abs_x = spu_sel(abs_x, _sub_d_(abs_x, abs_yy), bias);
-
-
-  overflow = spu_or(overflow, spu_shuffle(spu_rlmaska(abs_x, -31), vec_zero, splat_hi)); // minous
-
-  // make x2
-  abs_2x = _twice(spu_andc(abs_x, sign_mask));  // 2 x x  unsupport minous 
-  bias = spu_andc(bias, spu_rlmaska(_sub_d_(abs_2x, abs_yy), -31));
-  bias = spu_andc(spu_shuffle(bias, bias, splat_hi), overflow);
-  abs_x = spu_sel(abs_x, _sub_d_(abs_x, abs_yy), bias);
-
-  /* select final answer 
-   */
-  result = spu_xor(abs_x, spu_and((vec_uint4)x, sign_mask)); // set sign
-  result = spu_sel(result, val_nan, nan_out); // if nan
-
-  return ((vec_double2)result);
-}
-
-/*
- * subtraction function in limited confdition
- */
-static inline vec_uint4 _sub_d_(vec_uint4 aa, vec_uint4 bb)
-{
-  // which is bigger input aa or bb
-  vec_uint4 is_bigb = _vec_gt64(bb, aa);  // bb > aa
-
-  // need denorm calc ?
-  vec_uint4 norm_a, norm_b;
-  norm_a = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL)));
-  norm_b = spu_cmpgt(bb, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL)));
-  norm_a = spu_and(norm_a, norm_b);
-  norm_a = spu_shuffle(norm_a, norm_a,((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
-
-  // calc (aa - bb) and (bb - aa)
-  vec_uint4 res_a, res_b, res;
-  vec_uint4 borrow_a, borrow_b;
-  vec_uchar16 mask_b = ((vec_uchar16){4,5,6,7,192,192,192,192,12,13,14,15,192,192,192,192});
-  borrow_a = spu_genb(aa, bb);
-  borrow_b = spu_genb(bb, aa);
-  borrow_a = spu_shuffle(borrow_a, borrow_a, mask_b);
-  borrow_b = spu_shuffle(borrow_b, borrow_b, mask_b);
-  res_a = spu_subx(aa, bb, borrow_a);
-  res_b = spu_subx(bb, aa, borrow_b);
-  res_b = spu_or(res_b, ((vec_uint4){0x80000000,0,0x80000000,0}));  // set sign
-
-  res = spu_sel(res_a, res_b, is_bigb);  // select (aa - bb) or (bb - aa)
-  // select normal calc or special
-  res = spu_sel(res, (vec_uint4)spu_sub((vec_double2)aa, (vec_double2)bb), norm_a);
-
-  return res;
-}
-
-
-/*
- * extend spu_cmpgt function to 64bit data
- */
-static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb)
-{
-  vec_uint4 gt = spu_cmpgt(aa, bb);  // aa > bb
-  vec_uint4 eq = spu_cmpeq(aa, bb);  // aa = bb
-  return spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4))); // only higher is right
-}
-static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb)
-{
-  vec_uint4 gt_hi = _vec_gt64_half(aa, bb); // only higher is right
-  return spu_shuffle(gt_hi, gt_hi, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
-}
-
-/*
- * double formated x2
- */
-static inline vec_uint4 _twice(vec_uint4 aa)
-{
-  vec_uint4 norm = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL))); // exp > 0
-  norm = spu_shuffle(norm, norm, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
-
-  // if denorm or zero << 1 , if norm exp + 1
-  return spu_sel(spu_slqw(aa, 1), spu_add(aa, (vec_uint4)(spu_splats(0x0010000000000000ULL))), norm); // x2
-}
--- a/Extras/simdmathlibrary/spu/remainderf4.c
+++ b/Extras/simdmathlibrary/spu/remainderf4.c
@@ -1,107 +0,0 @@
-/* remainderf4 - for each of four float slots, compute remainder of x/y defined as x - nearest_integer(x/y) * y.
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <simdmath.h>
-#include <spu_intrinsics.h>
-
-//
-// This returns an accurate result when |divf4(x,y)| < 2^20 and |x| < 2^128, and otherwise returns zero.
-// If x == 0, the result is 0.
-// If x != 0 and y == 0, the result is undefined.
-vector float
-remainderf4 (vector float x, vector float y)
-{
-   vec_float4 q, xabs, yabs, qabs, xabs2, yabshalf;
-   vec_int4   qi0, qi1, qi2;
-   vec_float4 i0, i1, i2, i, rem;
-   vec_uint4  inrange, odd0, odd1, odd2, cmp1, cmp2, odd;
-
-   // Find i = truncated_integer(|x/y|)
-
-   // By the error bounds of divf4, if |x/y| is < 2^20, the quotient is at most off by 1.0.
-   // Thus the exact truncation is either the truncated quotient, one less, or one greater.
-
-   q = divf4( x, y );
-   xabs = fabsf4( x );
-   yabs = fabsf4( y );
-   qabs = fabsf4( q );
-   xabs2 = spu_add( xabs, xabs );
-    
-   inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x49800000), q );
-   inrange = spu_and( inrange, spu_cmpabsgt( (vec_float4)spu_splats(0x7f800000), x ) );
-
-   qi1 = spu_convts( qabs, 0 );
-   qi0 = spu_add( qi1, -1 );
-   qi2 = spu_add( qi1, 1 );
-
-   odd1 = spu_cmpeq( spu_and( qi1, 1 ), 1 );
-   odd0 = odd2 = spu_nor( odd1, odd1 );
-
-   i0 = spu_convtf( qi0, 0 );
-   i1 = spu_convtf( qi1, 0 );
-   i2 = spu_convtf( qi2, 0 );
-
-   // Correct i will be the largest one such that |x| - i*|y| >= 0.  Can test instead as 
-   // 2*|x| - i*|y| >= |x|:
-   // 
-   // With exact inputs, the negative-multiply-subtract gives the exact result rounded towards zero.  
-   // Thus |x| - i*|y| may be < 0 but still round to zero.  However, if 2*|x| - i*|y| < |x|, the computed
-   // answer will be rounded down to < |x|.  2*|x| can be represented exactly provided |x| < 2^128.
-
-   cmp1 = spu_cmpgt( xabs, spu_nmsub( i1, yabs, xabs2 ) );
-   cmp2 = spu_cmpgt( xabs, spu_nmsub( i2, yabs, xabs2 ) );
-
-   i = i0;
-   i = spu_sel( i1, i, cmp1 );
-   i = spu_sel( i2, i, cmp2 );
-
-   odd = odd0;
-   odd = spu_sel( odd1, odd, cmp1 );
-   odd = spu_sel( odd2, odd, cmp2 );
-   
-   rem = spu_nmsub( i, yabs, xabs );
-
-   // Test whether i or i+1 = nearest_integer(|x/y|)
-   //
-   // i+1 is correct if:
-   //
-   // rem > 0.5*|y|
-   // or 
-   // rem = 0.5*|y| and i is odd
-
-   yabshalf = spu_mul( yabs, spu_splats(0.5f) );
-   cmp1 = spu_cmpgt( rem, yabshalf );
-   cmp2 = spu_and( spu_cmpeq( rem, yabshalf ), odd );
-
-   i = spu_sel( i, spu_add( i, spu_splats(1.0f) ), spu_or( cmp1, cmp2 ) );
-   i = copysignf4( i, q );
-
-   return spu_sel( spu_splats(0.0f), spu_nmsub( i, y, x ), inrange );
-}
-
--- a/Extras/simdmathlibrary/spu/remquod2.c
+++ b/Extras/simdmathlibrary/spu/remquod2.c
@@ -1,356 +0,0 @@
-/* remquod2 - 
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
- */
-#include <simdmath.h>
-#include <spu_intrinsics.h>
-
-/* 
- * This function returns the same vector double result as remainderd2(). 
- * In addition a vector signed long long is storedin *pquo, 
- * that contains the corresponding element values whose sign is 
- * the sign of xi / yi and whose magnitude is congruent modulo 2n to
- * the magnitude of the integral quotient of xi / yi, where n is 
- * an implementation-defined integer greater than or equal to 3.
- */
- 
-static inline vec_uint4 _sub_d_(vec_uint4 aa, vec_uint4 bb);
-static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb);
-static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb);
-static inline vec_uint4 _twice(vec_uint4 aa);
-
-vector double 
-remquod2(vector double x, vector double yy, vector signed long long *quo)
-{
-  vec_uchar16 splat_hi   = ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11});
-  vec_int4 quotient, quotient0;
-  vec_uint4 y_hi;
-  vec_uint4 abs_x, abs_yy, abs_2x, abs_8y, abs_4y, abs_2y;
-  vec_uint4 bias;
-  vec_uint4 nan_out, not_ge, quo_pos, overflow;
-  vec_uint4 result;
-  vec_uint4 half_smax = spu_splats((unsigned int)0x7FEFFFFF);
-  vec_uint4 sign_mask = (vec_uint4)(spu_splats(0x8000000000000000ULL));
-  vec_uint4 exp_mask  = (vec_uint4)(spu_splats(0x7FF0000000000000ULL));
-  vec_uint4 val_nan   = (vec_uint4)(spu_splats(0x7FF8000000000000ULL));
-  vec_uint4 vec_zero = spu_splats((unsigned int)0);
-  vec_uint4 is_zeroy;
-
-  // cut sign
-  abs_x = spu_andc((vec_uint4)x, sign_mask);
-  abs_yy = spu_andc((vec_uint4)yy, sign_mask);
-  y_hi = spu_shuffle(abs_yy, abs_yy, splat_hi);
-
-  quo_pos = spu_cmpgt((vec_int4)spu_and((vec_uint4)spu_xor(x, yy), sign_mask), -1);
-  quo_pos = spu_shuffle(quo_pos, quo_pos, splat_hi);
-
-  // check nan out
-  is_zeroy = spu_cmpeq(abs_yy, vec_zero);
-  is_zeroy = spu_and(is_zeroy, spu_rlqwbyte(is_zeroy, 4));
-  nan_out = _vec_gt64_half(abs_yy, exp_mask);  // y > 7FF00000
-  nan_out = spu_or(nan_out, spu_cmpgt(abs_x, half_smax)); // x >= 7FF0000000000000
-  nan_out = spu_or(nan_out, is_zeroy);                    // y = 0
-  nan_out = spu_shuffle(nan_out, nan_out, splat_hi);
-
-
-  // make y x8
-  abs_2y = _twice(abs_yy); // 2 x y
-  abs_4y = _twice(abs_2y); // 4 x y
-  abs_8y = _twice(abs_4y); // 2 x y
-
-  /*
-   * use fmodd2 function
-   */
-  // get remainder of y x8
-//  result = (vec_uint4)_fmodd2( x, (vec_double2)abs_8y);
-  {
-    vec_double2 y = (vec_double2)abs_8y;
-
-    int shiftx0, shiftx1, shifty0, shifty1;
-    vec_uchar16 swap_words = ((vec_uchar16){ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11});
-    vec_uchar16 propagate = ((vec_uchar16){ 4,5,6,7, 192,192,192,192, 12,13,14,15, 192,192,192,192});
-//    vec_uchar16 splat_hi = ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11});
-    vec_int4 n, shift;
-    vec_uint4 exp_x, exp_y;
-//    , sign;
-//    vec_uint4 abs_x, abs_y;
-    vec_uint4 abs_y;
-    vec_uint4 mant_x, mant_x0, mant_x1;
-    vec_uint4 mant_y, mant_y0, mant_y1;
-    vec_uint4 mant_0, mant_1;
-    vec_uint4 mant_r, mant_l;
-//    vec_uint4 result;
-    vec_uint4 result0, resultx;
-    vec_uint4 zero_x, zero_y;
-    vec_uint4 denorm_x, denorm_y;
-    vec_uint4 cnt, cnt_x, cnt_y;
-    vec_uint4 shift_x, shift_y;
-    vec_uint4 adj_x, adj_y;
-    vec_uint4 z, borrow, mask;
-    vec_uint4 lsb       = (vec_uint4)(spu_splats(0x0000000000000001ULL));
-//    vec_uint4 sign_mask = (vec_uint4)(spu_splats(0x8000000000000000ULL));
-    vec_uint4 implied_1 = (vec_uint4)(spu_splats(0x0010000000000000ULL));
-    vec_uint4 mant_mask = (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL));
-//    vec_uint4 exp_mask  = (vec_uint4)(spu_splats(0x7FF0000000000000ULL));
-    vec_uint4 merge_sel = ((vec_uint4){0,0,-1,-1});
-//    vec_uint4 vec_zero = spu_splats((unsigned int)0);
-
-//    sign  = spu_and( (vec_uint4)x, sign_mask);
-//    abs_x = spu_andc((vec_uint4)x, sign_mask);
-    abs_y = spu_andc((vec_uint4)y, sign_mask);
-    exp_x = spu_rlmask(abs_x, -20);
-    exp_y = spu_rlmask(abs_y, -20);
-    // get shift count for denorm
-    cnt_x = spu_cntlz(abs_x);
-    cnt_y = spu_cntlz(abs_y);
-    cnt_x = spu_add(cnt_x, spu_sel( vec_zero, spu_rlqwbyte(cnt_x, 4), spu_cmpeq(cnt_x, 32)));
-    cnt_y = spu_add(cnt_y, spu_sel( vec_zero, spu_rlqwbyte(cnt_y, 4), spu_cmpeq(cnt_y, 32)));
-
-    zero_x = spu_cmpgt(cnt_x, 63);  // zero ?
-    zero_y = spu_cmpgt(cnt_y, 63);  // zero ?
-    result0 = spu_or(zero_x, zero_y);
-    result0 = spu_shuffle(result0, result0, splat_hi);
-
-    // 0 - (cnt_x - 11) = 11 - cnt_x
-    shift_x= spu_add(cnt_x, -11);
-    shift_y= spu_add(cnt_y, -11);
-    cnt_x = spu_sub(11, cnt_x);
-    cnt_y = spu_sub(11, cnt_y);
-
-    // count to normalize
-    adj_x = spu_sel(spu_add(exp_x, -1), cnt_x, spu_cmpeq(exp_x, 0));
-    adj_y = spu_sel(spu_add(exp_y, -1), cnt_y, spu_cmpeq(exp_y, 0));
-    adj_x = spu_shuffle(adj_x, adj_x, splat_hi);
-    adj_y = spu_shuffle(adj_y, adj_y, splat_hi);
-
-    // for denorm
-    shiftx0 = spu_extract(shift_x, 0);
-    shiftx1 = spu_extract(shift_x, 2);
-    shifty0 = spu_extract(shift_y, 0);
-    shifty1 = spu_extract(shift_y, 2);
-    mant_x0 = spu_slqwbytebc( spu_slqw(spu_and(abs_x,((vec_uint4){-1,-1,0,0})),shiftx0), shiftx0);
-    mant_y0 = spu_slqwbytebc( spu_slqw(spu_and(abs_y,((vec_uint4){-1,-1,0,0})),shifty0), shifty0);
-    mant_x1 = spu_slqwbytebc( spu_slqw(abs_x,shiftx1), shiftx1);
-    mant_y1 = spu_slqwbytebc( spu_slqw(abs_y,shifty1), shifty1);
-    mant_x = spu_sel(mant_x0, mant_x1, merge_sel);
-    mant_y = spu_sel(mant_y0, mant_y1, merge_sel);
-
-    denorm_x = spu_cmpgt((vec_int4)vec_zero, (vec_int4)adj_x);
-    denorm_y = spu_cmpgt((vec_int4)vec_zero, (vec_int4)adj_y);
-    mant_x = spu_sel(spu_and(abs_x, mant_mask), mant_x, denorm_x);
-    mant_y = spu_sel(spu_and(abs_y, mant_mask), mant_y, denorm_y);
-    mant_x = spu_or(mant_x, implied_1); // hidden bit
-    mant_y = spu_or(mant_y, implied_1); // hidden bit
-
-    // x < y ?
-    resultx = _vec_gt64(abs_y, abs_x);
-
-    n = spu_sub((vec_int4)adj_x, (vec_int4)adj_y);
-    mask = spu_cmpgt(n, 0);
-    mask = spu_andc(mask, resultx);
-
-    while (spu_extract(spu_gather(mask), 0)) {
-      borrow = spu_genb(mant_x, mant_y);
-      borrow = spu_shuffle(borrow, borrow, propagate);
-      z = spu_subx(mant_x, mant_y, borrow);
-
-      result0 = spu_or(spu_and(spu_cmpeq(spu_or(z, spu_shuffle(z, z, swap_words)), 0), mask), result0);
-      
-      mant_x = spu_sel(mant_x, 
-         spu_sel(spu_slqw(mant_x, 1), spu_andc(spu_slqw(z, 1), lsb), spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1)),
-         mask);
-
-      n = spu_add(n, -1);
-      mask = spu_cmpgt(n, 0);
-    }
-
-    borrow = spu_genb(mant_x, mant_y);
-    borrow = spu_shuffle(borrow, borrow, propagate);
-    z = spu_subx(mant_x, mant_y, borrow);
-    mant_x = spu_sel(mant_x, z, spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1));
-    result0 = spu_or(spu_cmpeq(spu_or(mant_x, spu_shuffle(mant_x, mant_x, swap_words)), 0), result0);
-
-    // bring back to original range
-    mant_0 = spu_and(mant_x, ((vec_uint4){0x001FFFFF,-1,0,0}));
-    mant_1 = spu_and(mant_x, ((vec_uint4){0,0,0x001FFFFF,-1}));
-
-    // for adj_y < 0 exp max=1
-    shiftx0 = spu_extract(adj_y, 0);
-    shiftx1 = spu_extract(adj_y, 2);
-    mant_x0 = spu_rlmaskqwbytebc(spu_rlmaskqw(mant_0, shiftx0), 7 + shiftx0);
-    mant_x1 = spu_rlmaskqwbytebc(spu_rlmaskqw(mant_1, shiftx1), 7 + shiftx1);
-    mant_r  = spu_sel(mant_x0, mant_x1, merge_sel);
-
-    // for adj_y >= 0
-    cnt = spu_cntlz(mant_x);
-    cnt = spu_add(cnt, spu_sel( vec_zero, spu_rlqwbyte(cnt, 4), spu_cmpeq(cnt, 32)));
-    cnt = spu_add(cnt, -11);
-    cnt = spu_sel(vec_zero, cnt, spu_cmpgt(cnt, 0)); // for exp >= 1
-    shift = (vec_int4)spu_sel(cnt, adj_y, spu_cmpgt(cnt, adj_y));
-    shiftx0 = spu_extract(shift, 0);
-    shiftx1 = spu_extract(shift, 2);
-    mant_x0 = spu_slqwbytebc(spu_slqw(mant_0, shiftx0), shiftx0);
-    mant_x1 = spu_slqwbytebc(spu_slqw(mant_1, shiftx1), shiftx1);
-    mant_l  = spu_sel(mant_x0, mant_x1, merge_sel);
-    cnt = spu_sub(adj_y, (vec_uint4)shift);
-    mant_l = spu_add(mant_l, spu_and(spu_rl(cnt,20), exp_mask));
-
-    result = spu_sel(mant_l, mant_r, denorm_y);
-    result = spu_sel(result, vec_zero, result0); // reminder 0
-    result = spu_sel(result, abs_x,    resultx); // x < y
-//    result = spu_xor(result, sign);              // set sign
-
-//    return ((vec_double2)result);
-  }
-
-  // if y (x8->exp+3 7FF-7FC) overflow 
-//  abs_x = spu_sel(spu_andc(result, sign_mask), abs_x, spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FBFFFFF)));
-  abs_x = spu_sel(result, abs_x, spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FBFFFFF)));
-
-  /* if (x >= 4*y)
-   * 	x -= 4*y
-   *    quotient = 4
-   * else 
-   *	quotient = 0
-   */
-  overflow = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FCFFFFF));
-
-  not_ge = _vec_gt64(abs_4y, abs_x);
-  not_ge = spu_or(not_ge, overflow);
-  abs_x = spu_sel(_sub_d_(abs_x, abs_4y), abs_x, not_ge);
-  quotient = spu_andc(spu_splats((int)4), (vec_int4)not_ge);
-
-  /* if (x >= 2*y
-   *	x -= 2*y
-   *    quotient += 2
-   */
-  overflow = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FDFFFFF));
-
-  not_ge = _vec_gt64(abs_2y, abs_x);  // abs_2y > abs_x
-  not_ge = spu_or(not_ge, overflow);
-  
-  abs_x = spu_sel(_sub_d_(abs_x, abs_2y), abs_x, not_ge);
-  quotient = spu_sel(spu_add(quotient, 2), quotient, not_ge);
-
-  /* if (2*x > y)
-   *     x -= y
-   *     if (2*x >= y) x -= y
-   */
-  overflow = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FEFFFFF));
-  // make x2
-  abs_2x = _twice(abs_x);  // 2 x x
-
-  bias = _vec_gt64(abs_2x, abs_yy);  // abs_2x > abs_yy
-  bias = spu_andc(bias, overflow);
-
-  abs_x = spu_sel(abs_x, _sub_d_(abs_x, abs_yy), bias);
-  quotient = spu_sub(quotient, (vec_int4)bias);
-
-  overflow = spu_or(overflow, spu_shuffle(spu_rlmaska(abs_x, -31), vec_zero, splat_hi)); // minous
-
-  // make x2
-  abs_2x = _twice(spu_andc(abs_x, sign_mask));  // 2 x x  unsupport minous 
-  bias = spu_andc(bias, spu_rlmaska(_sub_d_(abs_2x, abs_yy), -31));
-  bias = spu_andc(spu_shuffle(bias, bias, splat_hi), overflow);
-  abs_x = spu_sel(abs_x, _sub_d_(abs_x, abs_yy), bias);
-  quotient = spu_sub(quotient, (vec_int4)bias);
-
-  /* select final answer 
-   */
-  result = spu_xor(abs_x, spu_and((vec_uint4)x, sign_mask)); // set sign
-  result = spu_sel(result, val_nan, nan_out); // if nan
-
-  quotient = spu_and(quotient, ((vec_int4){0,7,0,7}));       // limit to 3bit
-  quotient0 = spu_subx( (vec_int4)vec_zero, quotient, spu_rlqwbyte(spu_genb((vec_int4)vec_zero,quotient),4));
-  quotient = spu_sel(quotient0, quotient, quo_pos);
-
-  *quo = (vec_llong2)quotient;
-
-  return ((vec_double2)result);
-}
-
-/*
- * subtraction function in limited confdition
- */
-static inline vec_uint4 _sub_d_(vec_uint4 aa, vec_uint4 bb)
-{
-  // which is bigger input aa or bb
-  vec_uint4 is_bigb = _vec_gt64(bb, aa);  // bb > aa
-
-  // need denorm calc ?
-  vec_uint4 norm_a, norm_b;
-  norm_a = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL)));
-  norm_b = spu_cmpgt(bb, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL)));
-  norm_a = spu_and(norm_a, norm_b);
-  norm_a = spu_shuffle(norm_a, norm_a,((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
-
-  // calc (aa - bb) and (bb - aa)
-  vec_uint4 res_a, res_b, res;
-  vec_uint4 borrow_a, borrow_b;
-  vec_uchar16 mask_b = ((vec_uchar16){4,5,6,7,192,192,192,192,12,13,14,15,192,192,192,192});
-  borrow_a = spu_genb(aa, bb);
-  borrow_b = spu_genb(bb, aa);
-  borrow_a = spu_shuffle(borrow_a, borrow_a, mask_b);
-  borrow_b = spu_shuffle(borrow_b, borrow_b, mask_b);
-  res_a = spu_subx(aa, bb, borrow_a);
-  res_b = spu_subx(bb, aa, borrow_b);
-  res_b = spu_or(res_b, ((vec_uint4){0x80000000,0,0x80000000,0}));  // set sign
-
-  res = spu_sel(res_a, res_b, is_bigb);  // select (aa - bb) or (bb - aa)
-  // select normal calc or special
-  res = spu_sel(res, (vec_uint4)spu_sub((vec_double2)aa, (vec_double2)bb), norm_a);
-
-  return res;
-}
-
-
-/*
- * extend spu_cmpgt function to 64bit data
- */
-static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb)
-{
-  vec_uint4 gt = spu_cmpgt(aa, bb);  // aa > bb
-  vec_uint4 eq = spu_cmpeq(aa, bb);  // aa = bb
-  return spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4))); // only higher is right
-}
-static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb)
-{
-  vec_uint4 gt_hi = _vec_gt64_half(aa, bb); // only higher is right
-  return spu_shuffle(gt_hi, gt_hi, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
-}
-
-/*
- * double formated x2
- */
-static inline vec_uint4 _twice(vec_uint4 aa)
-{
-  vec_uint4 norm = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL))); // exp > 0
-  norm = spu_shuffle(norm, norm, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
-
-  // if denorm or zero << 1 , if norm exp + 1
-  return spu_sel(spu_slqw(aa, 1), spu_add(aa, (vec_uint4)(spu_splats(0x0010000000000000ULL))), norm); // x2
-}
--- a/Extras/simdmathlibrary/spu/rsqrtd2.c
+++ b/Extras/simdmathlibrary/spu/rsqrtd2.c
@@ -1,96 +0,0 @@
-/* rsqrtd2 - for each of two double slots, compute reciprocal square root.
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <simdmath.h>
-#include <spu_intrinsics.h>
-
-// 
-// Handles exceptional values as follows:
-// NaN -> NaN
-// (+,-)0 -> (+,-)Inf
-// +Inf -> +0
-// -Inf -> Nan
-// -Finite -> Nan
-// Denormal inputs are treated as zero.
-
-vector double rsqrtd2 (vector double x)
-{
-   vec_ullong2 expmask, onemask, signmask, evenexp;
-   vec_double2 half, one, man, exp, nexp, y1, y2, y3, zero, inf, nan, result;
-   vec_float4  halff, onef, manf, y0f, y1f;
-
-   expmask = spu_splats(0x7ff0000000000000ull);
-   onemask = spu_splats(0x0010000000000000ull);
-   signmask = spu_splats(0x8000000000000000ull);
-   onef = spu_splats(1.0f);
-   one = spu_extend( onef );
-   halff = spu_splats(0.5f);
-   half = spu_extend( halff );
-
-   // Factor input ( mantissa x 2^exponent ) into ( mantissa x 2^(-i) ) and ( 2^(exponent+i) )
-   // where i = 0 when exponent is even and i = 1 when exponent is odd.
-   // 
-   // Compute reciprocal-square-root of second factor by finding -(exponent+i)/2:
-   // 
-   // biased_exp = 1023 + exponent
-   // new_biased_exp = 1023 - (exponent+i)/2 
-   //                = 1023 - (biased_exp-1023+i)/2
-   //                = (3069 - (biased_exp+i)) / 2
-
-   evenexp = spu_and( (vec_ullong2)x, onemask );
-   man = spu_sel( x, (vec_double2)spu_add( spu_splats(0x3fe00000u), (vec_uint4)evenexp ), expmask );
-
-   exp = spu_and( x, (vec_double2)expmask );
-   nexp = spu_or( exp, (vec_double2)onemask );
-   nexp = (vec_double2)spu_rlmask( spu_sub( (vec_uint4)spu_splats(0xbfd0000000000000ull), (vec_uint4)nexp ), -1 );
-
-   // Compute mantissa part in single precision.
-   // Convert back to double and multiply with 2^(-(exponent+i)/2), then
-   // do two Newton-Raphson steps for full precision.
-
-   manf = spu_roundtf( man );
-   y0f = spu_rsqrte( manf );
-   y1f = spu_madd( spu_mul( y0f, halff ), spu_nmsub( y0f, spu_mul( y0f, manf ), onef ), y0f );
-   y1 = spu_mul( spu_extend( y1f ), nexp );
-   y2 = spu_madd( spu_mul( y1, half ), spu_nmsub( y1, spu_mul( y1, x ), one ), y1 );
-   y3 = spu_madd( spu_mul( y2, half ), spu_nmsub( y2, spu_mul( y2, x ), one ), y2 );
-
-   // Choose iterated result or special value.
-
-   zero = spu_and( x, (vec_double2)signmask );
-   inf = spu_sel( (vec_double2)expmask, x, signmask );
-   nan = (vec_double2)spu_splats(0x7ff8000000000000ull);
-
-   result = spu_sel( y3, zero, isinfd2 ( x ) );
-   result = spu_sel( result, nan, signbitd2 ( x ) );
-   result = spu_sel( result, inf, is0denormd2 ( x ) );
-
-   return result;
-}
-
--- a/Extras/simdmathlibrary/spu/simdmath/_lldiv.h
+++ b/Extras/simdmathlibrary/spu/simdmath/_lldiv.h
@@ -0,0 +1,116 @@
+/* Common functions for lldivi2/lldivu2
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ___SIMD_MATH_LLDIV_H___
+#define ___SIMD_MATH_LLDIV_H___
+
+#include <spu_intrinsics.h>
+
+static inline vector unsigned long long 
+__ll_spu_cntlz(vector unsigned long long x)
+{
+  vec_uint4 cnt;
+
+  cnt = spu_cntlz((vec_uint4)x);
+  cnt = spu_add(cnt, spu_and(spu_cmpeq(cnt, 32), spu_rlqwbyte(cnt, 4)));
+  cnt = spu_shuffle(cnt, cnt, ((vec_uchar16){0x80,0x80,0x80,0x80, 0,1,2,3, 0x80,0x80,0x80,0x80, 8,9,10,11}));
+
+  return (vec_ullong2)cnt;
+}
+
+static inline vector unsigned long long 
+__ll_spu_sl(vector unsigned long long x, vector unsigned long long count)
+{
+  vec_ullong2 mask = (vec_ullong2){0xffffffffffffffffull, 0ull};
+  vec_ullong2 x_upper, x_lower;
+
+  // shift upper word
+  x_upper = spu_and(x, mask);
+  x_upper = spu_slqwbytebc(x_upper, spu_extract((vec_uint4)count, 1));
+  x_upper = spu_slqw(x_upper, spu_extract((vec_uint4)count, 1));
+  
+  // shift lower word
+  x_lower = spu_slqwbytebc(x, spu_extract((vec_uint4)count, 3));
+  x_lower = spu_slqw(x_lower, spu_extract((vec_uint4)count, 3));
+
+  return spu_sel(x_lower, x_upper, mask);
+}
+
+static inline vector unsigned long long 
+__ll_spu_rlmask(vector unsigned long long x, vector unsigned long long count)
+{
+  vec_ullong2 mask = (vec_ullong2){0xffffffffffffffffull, 0ull};
+  vec_ullong2 x_upper, x_lower;
+  vec_uint4 cnt_byte;
+
+  cnt_byte = spu_add((vec_uint4)count, 7);
+
+  // shift upper word
+  x_upper = spu_rlmaskqwbytebc(x, spu_extract(cnt_byte, 1));
+  x_upper = spu_rlmaskqw(x_upper, spu_extract((vec_uint4)count, 1));
+  
+  // shift lower word
+  x_lower = spu_andc(x, mask);
+  x_lower = spu_rlmaskqwbytebc(x_lower, spu_extract(cnt_byte, 3));
+  x_lower = spu_rlmaskqw(x_lower, spu_extract((vec_uint4)count, 3));
+
+  return spu_sel(x_lower, x_upper, mask);
+}
+
+static inline vector unsigned long long 
+__ll_spu_cmpeq_zero(vector unsigned long long x)
+{
+  vec_uint4 cmp;
+
+  cmp = spu_cmpeq((vec_uint4)x, 0);
+  return (vec_ullong2)spu_and(cmp, spu_shuffle(cmp, cmp, ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11})));
+}
+
+static inline vector unsigned long long 
+__ll_spu_cmpgt(vector unsigned long long x, vector unsigned long long y)
+{
+  vec_uint4 gt;
+
+  gt = spu_cmpgt((vec_uint4)x, (vec_uint4)y);
+  gt = spu_sel(gt, spu_rlqwbyte(gt, 4), spu_cmpeq((vec_uint4)x, (vec_uint4)y));
+  return (vec_ullong2)spu_shuffle(gt, gt, ((vec_uchar16){0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11}));
+}
+
+static inline vector unsigned long long 
+__ll_spu_sub(vector unsigned long long x, vector unsigned long long y)
+{
+  vec_uint4 borrow;
+
+  borrow = spu_genb((vec_uint4)x, (vec_uint4)y);
+  borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){4,5,6,7, 0xc0,0xc0,0xc0,0xc0, 12,13,14,15, 0xc0,0xc0,0xc0,0xc0}));
+  return (vec_ullong2)spu_subx((vec_uint4)x, (vec_uint4)y, borrow);
+}
+
+#endif // __LLDIV_H__
+
--- a/Extras/simdmathlibrary/spu/simdmath/_remainder.h
+++ b/Extras/simdmathlibrary/spu/simdmath/_remainder.h
@@ -0,0 +1,84 @@
+/* A vector double is returned that contains the internal routine regarding remainder.
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ___SIMD_MATH__REMAINDER_H___
+#define ___SIMD_MATH__REMAINDER_H___
+
+#include <simdmath/_vec_utils.h>
+
+/*
+ * double formated x2
+ */
+static inline vec_uint4
+__rem_twice_d(vec_uint4 aa)
+{
+  vec_uint4 norm = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL))); // exp > 0
+  norm = spu_shuffle(norm, norm, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
+
+  // if denorm or zero << 1 , if norm exp + 1
+  return spu_sel(spu_slqw(aa, 1), spu_add(aa, (vec_uint4)(spu_splats(0x0010000000000000ULL))), norm); // x2
+}
+
+/*
+ * subtraction function in limited confdition
+ */
+static inline vec_uint4
+__rem_sub_d(vec_uint4 aa, vec_uint4 bb)
+{
+  // which is bigger input aa or bb
+  vec_uint4 is_bigb = __vec_gt64(bb, aa);  // bb > aa
+
+  // need denorm calc ?
+  vec_uint4 norm_a, norm_b;
+  norm_a = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL)));
+  norm_b = spu_cmpgt(bb, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL)));
+  norm_a = spu_and(norm_a, norm_b);
+  norm_a = spu_shuffle(norm_a, norm_a,((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
+
+  // calc (aa - bb) and (bb - aa)
+  vec_uint4 res_a, res_b, res;
+  vec_uint4 borrow_a, borrow_b;
+  vec_uchar16 mask_b = ((vec_uchar16){4,5,6,7,192,192,192,192,12,13,14,15,192,192,192,192});
+  borrow_a = spu_genb(aa, bb);
+  borrow_b = spu_genb(bb, aa);
+  borrow_a = spu_shuffle(borrow_a, borrow_a, mask_b);
+  borrow_b = spu_shuffle(borrow_b, borrow_b, mask_b);
+  res_a = spu_subx(aa, bb, borrow_a);
+  res_b = spu_subx(bb, aa, borrow_b);
+  res_b = spu_or(res_b, ((vec_uint4){0x80000000,0,0x80000000,0}));  // set sign
+
+  res = spu_sel(res_a, res_b, is_bigb);  // select (aa - bb) or (bb - aa)
+  // select normal calc or special
+  res = spu_sel(res, (vec_uint4)spu_sub((vec_double2)aa, (vec_double2)bb), norm_a);
+
+  return res;
+}
+
+#endif
+
--- a/Extras/simdmathlibrary/spu/simdmath/_vec_utils.h
+++ b/Extras/simdmathlibrary/spu/simdmath/_vec_utils.h
@@ -0,0 +1,57 @@
+/* Common types for SPU SIMD Math Library
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ___SIMD_MATH__VEC_UTILS_H___
+#define ___SIMD_MATH__VEC_UTILS_H___
+
+/*
+ * extend spu_cmpgt function to 64bit data
+ */
+static inline vec_uint4
+__vec_gt64_half(vec_uint4 aa, vec_uint4 bb)
+{
+  vec_uint4 gt = spu_cmpgt(aa, bb);  // aa > bb
+  vec_uint4 eq = spu_cmpeq(aa, bb);  // aa = bb
+  return spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4))); // only higher is right
+}
+static inline vec_uint4
+__vec_gt64(vec_uint4 aa, vec_uint4 bb)
+{
+  vec_uint4 gt_hi = __vec_gt64_half(aa, bb); // only higher is right
+  return spu_shuffle(gt_hi, gt_hi, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
+}
+
+static inline vec_uint4
+__vec_eq64_half(vec_uint4 aa, vec_uint4 bb)
+{
+  vec_uint4 eq = spu_cmpeq(aa, bb);
+  return spu_and(eq, spu_shuffle(eq, eq, ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11})));
+}
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/absi4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/absi4.h
@@ -27,14 +27,18 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_ABSI4_H___
+#define ___SIMD_MATH_ABSI4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector signed int
-absi4 (vector signed int x)
+static inline vector signed int
+_absi4 (vector signed int x)
 {
-   vec_int4 neg;
-   neg = spu_sub( 0, x );
-   return spu_sel( neg, x, spu_cmpgt( x, -1 ) );
+  vec_int4 neg;
+  neg = spu_sub( 0, x );
+  return spu_sel( neg, x, spu_cmpgt( x, -1 ) );
 }

+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/acosf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/acosf4.h
@@ -27,52 +27,56 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_ACOSF4_H___
+#define ___SIMD_MATH_ACOSF4_H___

 #include <simdmath.h>
 #include <spu_intrinsics.h>

+#include <simdmath/sqrtf4.h>
+
 //
 // Computes the inverse cosine of all four slots of x
 //
-vector float
-acosf4 (vector float x)
+static inline vector float
+_acosf4 (vector float x)
 {
-    vec_float4 result, xabs;
-    vec_float4 t1;
-    vec_float4 xabs2, xabs4;
-    vec_float4 hi, lo;
-    vec_float4 neg, pos;
-    vec_uint4 select;
+  vec_float4 result, xabs;
+  vec_float4 t1;
+  vec_float4 xabs2, xabs4;
+  vec_float4 hi, lo;
+  vec_float4 neg, pos;
+  vec_uint4 select;
    
-    xabs   = (vec_float4)(spu_rlmask(spu_sl((vec_uint4)(x), 1), -1));
-    select = (vec_uint4)(spu_rlmaska((vector signed int)(x), -31));
+  xabs   = (vec_float4)(spu_rlmask(spu_sl((vec_uint4)(x), 1), -1));
+  select = (vec_uint4)(spu_rlmaska((vector signed int)(x), -31));
    
-    t1 = sqrtf4(spu_sub( ((vec_float4){1.0, 1.0, 1.0, 1.0}) , xabs));
+  t1 = _sqrtf4(spu_sub( spu_splats(1.0f), xabs));
    
-    /* Instruction counts can be reduced if the polynomial was
-     * computed entirely from nested (dependent) fma's. However, 
-     * to reduce the number of pipeline stalls, the polygon is evaluated 
-     * in two halves (hi amd lo). 
-     */
-    xabs2 = spu_mul(xabs,  xabs);
-    xabs4 = spu_mul(xabs2, xabs2);
-    hi = spu_madd(spu_splats(-0.0012624911f), xabs, spu_splats(0.0066700901f));
-    hi = spu_madd(hi, xabs, spu_splats(-0.0170881256f));
-    hi = spu_madd(hi, xabs, spu_splats( 0.0308918810f));
-    lo = spu_madd(spu_splats(-0.0501743046f), xabs, spu_splats(0.0889789874f));
-    lo = spu_madd(lo, xabs, spu_splats(-0.2145988016f));
-    lo = spu_madd(lo, xabs, spu_splats( 1.5707963050f));
+  /* Instruction counts can be reduced if the polynomial was
+   * computed entirely from nested (dependent) fma's. However, 
+   * to reduce the number of pipeline stalls, the polygon is evaluated 
+   * in two halves (hi amd lo). 
+   */
+  xabs2 = spu_mul(xabs,  xabs);
+  xabs4 = spu_mul(xabs2, xabs2);
+  hi = spu_madd(spu_splats(-0.0012624911f), xabs, spu_splats(0.0066700901f));
+  hi = spu_madd(hi, xabs, spu_splats(-0.0170881256f));
+  hi = spu_madd(hi, xabs, spu_splats( 0.0308918810f));
+  lo = spu_madd(spu_splats(-0.0501743046f), xabs, spu_splats(0.0889789874f));
+  lo = spu_madd(lo, xabs, spu_splats(-0.2145988016f));
+  lo = spu_madd(lo, xabs, spu_splats( 1.5707963050f));
    
-    result = spu_madd(hi, xabs4, lo);
+  result = spu_madd(hi, xabs4, lo);
    
-    /* Adjust the result if x is negactive.
-     */
-    neg = spu_nmsub(t1, result, spu_splats(3.1415926535898f));
-    pos = spu_mul(t1, result);
+  /* Adjust the result if x is negactive.
+   */
+  neg = spu_nmsub(t1, result, spu_splats(3.1415926535898f));
+  pos = spu_mul(t1, result);
    
-    result = spu_sel(pos, neg, select);
+  result = spu_sel(pos, neg, select);
    
-    return result;
+  return result;
 }

-
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/asinf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/asinf4.h
@@ -27,59 +27,66 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_ASINF4_H___
+#define ___SIMD_MATH_ASINF4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector float
-asinf4 (vector float x)
+#include <simdmath/sqrtf4.h>
+#include <simdmath/divf4.h>
+
+static inline vector float
+_asinf4 (vector float x)
 {
-    // positive = (x > 0)
-    //
-    vec_uchar16 positive = (vec_uchar16)spu_cmpgt(x,spu_splats(0.0f));
+  // positive = (x > 0)
+  //
+  vec_uint4 positive = spu_cmpgt(x,spu_splats(0.0f));

-    // gtHalf = (|x| > 0.5)
-    //
-    vec_uchar16 gtHalf = (vec_uchar16)spu_cmpabsgt(x,spu_splats(0.5f));    
+  // gtHalf = (|x| > 0.5)
+  //
+  vec_uint4 gtHalf = spu_cmpabsgt(x,spu_splats(0.5f));    

-    // x = absf(x)
-    //
-    x = (vec_float4)spu_and((vec_int4)x,spu_splats((int)0x7fffffff));
+  // x = absf(x)
+  //
+  x = (vec_float4)spu_and((vec_int4)x,spu_splats((int)0x7fffffff));


-    // if (x > 0.5)
-    //    g = 0.5 - 0.5*x
-    //    x = -2 * sqrtf(g)
-    // else
-    //    g = x * x
-    //
-    vec_float4 g = spu_sel(spu_mul(x,x),spu_madd(spu_splats(-0.5f),x,spu_splats(0.5f)),gtHalf);
+  // if (x > 0.5)
+  //    g = 0.5 - 0.5*x
+  //    x = -2 * sqrtf(g)
+  // else
+  //    g = x * x
+  //
+  vec_float4 g = spu_sel(spu_mul(x,x),spu_madd(spu_splats(-0.5f),x,spu_splats(0.5f)),gtHalf);
    
-    x = spu_sel(x,spu_mul(spu_splats(-2.0f),sqrtf4(g)),gtHalf);
+  x = spu_sel(x,spu_mul(spu_splats(-2.0f),_sqrtf4(g)),gtHalf);

-    // Compute the polynomials and take their ratio
-    //  denom = (1.0f*g + -0.554846723e+1f)*g + 5.603603363f
-    //  num = x * g * (-0.504400557f * g + 0.933933258f)
-    //
-    vec_float4 denom = spu_add(g,spu_splats(-5.54846723f));
-    vec_float4 num = spu_madd(spu_splats(-0.504400557f),g,spu_splats(0.933933258f));
-    denom = spu_madd(denom,g,spu_splats(5.603603363f));
-    num = spu_mul(spu_mul(x,g),num);
+  // Compute the polynomials and take their ratio
+  //  denom = (1.0f*g + -0.554846723e+1f)*g + 5.603603363f
+  //  num = x * g * (-0.504400557f * g + 0.933933258f)
+  //
+  vec_float4 denom = spu_add(g,spu_splats(-5.54846723f));
+  vec_float4 num = spu_madd(spu_splats(-0.504400557f),g,spu_splats(0.933933258f));
+  denom = spu_madd(denom,g,spu_splats(5.603603363f));
+  num = spu_mul(spu_mul(x,g),num);

    
-    // x = x + num / denom
-    //
-    x = spu_add(x,divf4(num,denom));
+  // x = x + num / denom
+  //
+  x = spu_add(x,_divf4(num,denom));

-    // if (x > 0.5)
-    //    x = x + M_PI_2
-    //
-    x = spu_sel(x,spu_add(x,spu_splats(1.57079632679489661923f)),gtHalf);
+  // if (x > 0.5)
+  //    x = x + M_PI_2
+  //
+  x = spu_sel(x,spu_add(x,spu_splats(1.57079632679489661923f)),gtHalf);

    
-    // if (!positive) x = -x
-    //
-    x = spu_sel((vec_float4)spu_xor(spu_splats((int)0x80000000),(vec_int4)x),x,positive);
+  // if (!positive) x = -x
+  //
+  x = spu_sel((vec_float4)spu_xor(spu_splats((int)0x80000000),(vec_int4)x),x,positive);

-    return x;
+  return x;
 }

+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/atan2f4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/atan2f4.h
@@ -27,34 +27,40 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_ATAN2F4_H___
+#define ___SIMD_MATH_ATAN2F4_H___

 #include <simdmath.h>
 #include <spu_intrinsics.h>

+#include <simdmath/atanf4.h>
+#include <simdmath/divf4.h>

 //
 // Inverse tangent function of two variables
 //
-vector float
-atan2f4 (vector float y, vector float x)
+static inline vector float
+_atan2f4 (vector float y, vector float x)
 {
-    vec_float4 res = atanf4(divf4(y,x));
+  vec_float4 res = _atanf4(_divf4(y,x));

-    // Use the arguments to determine the quadrant of the result:
-    // if (x < 0)
-    //   if (y < 0)
-    //      res = -PI + res
-    //   else
-    //      res = PI + res
-    //
-    vec_uchar16 yNeg = (vec_uchar16)spu_cmpgt(spu_splats(0.0f),y);
-    vec_uchar16 xNeg = (vec_uchar16)spu_cmpgt(spu_splats(0.0f),x);
+  // Use the arguments to determine the quadrant of the result:
+  // if (x < 0)
+  //   if (y < 0)
+  //      res = -PI + res
+  //   else
+  //      res = PI + res
+  //
+  vec_uint4 yNeg = spu_cmpgt(spu_splats(0.0f),y);
+  vec_uint4 xNeg = spu_cmpgt(spu_splats(0.0f),x);

-    vec_float4 bias = spu_sel(spu_splats(3.14159265358979323846f),spu_splats(-3.14159265358979323846f),yNeg);
+  vec_float4 bias = spu_sel(spu_splats(3.14159265358979323846f),spu_splats(-3.14159265358979323846f),yNeg);

-    vec_float4 newRes = spu_add(bias, res);
+  vec_float4 newRes = spu_add(bias, res);

-    res = spu_sel(res,newRes,xNeg);
+  res = spu_sel(res,newRes,xNeg);

-    return res;
+  return res;
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/atanf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/atanf4.h
@@ -27,50 +27,55 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_ATANF4_H___
+#define ___SIMD_MATH_ATANF4_H___

 #include <simdmath.h>
 #include <spu_intrinsics.h>

+#include <simdmath/recipf4.h>
+
 //
 // Computes the inverse tangent of all four slots of x. 
 //
-vector float
-atanf4 (vector float x)
+static inline vector float
+_atanf4 (vector float x)
 {
-    vec_float4 bias;
-    vec_float4 x2, x3, x4, x8, x9;
-    vec_float4 hi, lo;
-    vec_float4 result;
-    vec_float4 inv_x;
-    vec_uint4 sign;
-    vec_uint4 select;
+  vec_float4 bias;
+  vec_float4 x2, x3, x4, x8, x9;
+  vec_float4 hi, lo;
+  vec_float4 result;
+  vec_float4 inv_x;
+  vec_uint4 sign;
+  vec_uint4 select;
    
-    sign = spu_sl(spu_rlmask((vec_uint4)x, -31), 31);
-    inv_x = recipf4(x);
-    inv_x = (vec_float4)spu_xor((vec_uint4)inv_x, spu_splats(0x80000000u));
+  sign = spu_sl(spu_rlmask((vec_uint4)x, -31), 31);
+  inv_x = _recipf4(x);
+  inv_x = (vec_float4)spu_xor((vec_uint4)inv_x, spu_splats(0x80000000u));
    
-    select = (vec_uint4)spu_cmpabsgt(x, spu_splats(1.0f));
-    bias = (vec_float4)spu_or(sign, (vec_uint4)(spu_splats(1.57079632679489661923f)));
-    bias = (vec_float4)spu_and((vec_uint4)bias, select);
+  select = (vec_uint4)spu_cmpabsgt(x, spu_splats(1.0f));
+  bias = (vec_float4)spu_or(sign, (vec_uint4)(spu_splats(1.57079632679489661923f)));
+  bias = (vec_float4)spu_and((vec_uint4)bias, select);
    
-    x = spu_sel(x, inv_x, select);
+  x = spu_sel(x, inv_x, select);
    
-    bias = spu_add(bias, x);
-    x2 = spu_mul(x, x);
-    x3 = spu_mul(x2, x);
-    x4 = spu_mul(x2, x2);
-    x8 = spu_mul(x4, x4);
-    x9 = spu_mul(x8, x);
-    hi = spu_madd(spu_splats(0.0028662257f), x2, spu_splats(-0.0161657367f));
-    hi = spu_madd(hi, x2, spu_splats(0.0429096138f));
-    hi = spu_madd(hi, x2, spu_splats(-0.0752896400f));
-    hi = spu_madd(hi, x2, spu_splats(0.1065626393f));
-    lo = spu_madd(spu_splats(-0.1420889944f), x2, spu_splats(0.1999355085f));
-    lo = spu_madd(lo, x2, spu_splats(-0.3333314528f));
-    lo = spu_madd(lo, x3, bias);
+  bias = spu_add(bias, x);
+  x2 = spu_mul(x, x);
+  x3 = spu_mul(x2, x);
+  x4 = spu_mul(x2, x2);
+  x8 = spu_mul(x4, x4);
+  x9 = spu_mul(x8, x);
+  hi = spu_madd(spu_splats(0.0028662257f), x2, spu_splats(-0.0161657367f));
+  hi = spu_madd(hi, x2, spu_splats(0.0429096138f));
+  hi = spu_madd(hi, x2, spu_splats(-0.0752896400f));
+  hi = spu_madd(hi, x2, spu_splats(0.1065626393f));
+  lo = spu_madd(spu_splats(-0.1420889944f), x2, spu_splats(0.1999355085f));
+  lo = spu_madd(lo, x2, spu_splats(-0.3333314528f));
+  lo = spu_madd(lo, x3, bias);
    
-    result = spu_madd(hi, x9, lo);
+  result = spu_madd(hi, x9, lo);
    
-    return result;
+  return result;
 }

+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/cbrtf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/cbrtf4.h
@@ -27,79 +27,69 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_CBRTF4_H___
+#define ___SIMD_MATH_CBRTF4_H___

 #include <simdmath.h>
 #include <spu_intrinsics.h>

+#include <simdmath/frexpf4.h>
+#include <simdmath/ldexpf4.h>
+#include <simdmath/divf4.h>

-#define __calcQuot(xexp) n = xexp;					\
-  vec_uchar16 negxexpmask = (vec_uchar16)spu_cmpgt(spu_splats(0), n);	\
-  n = spu_sel(n, spu_add(n,2), negxexpmask);				\
-									\
-  quot = spu_add(spu_rlmaska(n,-2), spu_rlmaska(n,-4));			\
-  quot = spu_add(quot, spu_rlmaska(quot, -4));				\
-  quot = spu_add(quot, spu_rlmaska(quot, -8));				\
-  quot = spu_add(quot, spu_rlmaska(quot,-16));				\
-  vec_int4 r = spu_sub(spu_sub(n,quot), spu_sl(quot,1));		\
-  quot = spu_add(							\
-		 quot,							\
-		 spu_rlmaska(						\
-			     spu_add(					\
-				     spu_add(r,5),			\
-				     spu_sl (r,2)			\
-				     ),					\
-			     -4						\
-			     )						\
-		 );							\
+static inline vec_int4
+__cbrtf4_calc_quot(vec_int4 n)
+{
+  vec_int4 quot;
+  vec_uint4 negxexpmask = spu_cmpgt(spu_splats(0), n);
+  n = spu_sel(n, spu_add(n,2), negxexpmask);

-#define _CBRTF_H_cbrt2 1.2599210498948731648             // 2^(1/3)
-#define _CBRTF_H_sqr_cbrt2 1.5874010519681994748         // 2^(2/3)
+  quot = spu_add(spu_rlmaska(n,-2), spu_rlmaska(n,-4));
+  quot = spu_add(quot, spu_rlmaska(quot, -4));
+  quot = spu_add(quot, spu_rlmaska(quot, -8));
+  quot = spu_add(quot, spu_rlmaska(quot,-16));
+  vec_int4 r = spu_sub(spu_sub(n,quot), spu_sl(quot,1));
+  quot = spu_add(quot, spu_rlmaska(spu_add(spu_add(r,5), spu_sl (r,2)), -4));
+  return quot;
+}

-vector float
-cbrtf4 (vector float x)
+#define __CBRTF_cbrt2 1.2599210498948731648             // 2^(1/3)
+#define __CBRTF_sqr_cbrt2 1.5874010519681994748         // 2^(2/3)
+
+static inline vector float
+_cbrtf4 (vector float x)
 {
  vec_float4 zeros = spu_splats(0.0f);
-  vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(x, zeros);
-  vec_int4 xexp, n;
+  vec_uint4 zeromask = spu_cmpeq(x, zeros);
+  vec_int4 xexp;
  vec_float4 sgnmask = (vec_float4)spu_splats(0x7FFFFFFF);
-  vec_uchar16 negmask = (vec_uchar16)spu_cmpgt(spu_splats(0.0f), x);
+  vec_uint4 negmask = spu_cmpgt(spu_splats(0.0f), x);
  x = spu_and(x, sgnmask);

-  x = frexpf4(x, &xexp);
+  x = _frexpf4(x, &xexp);
  vec_float4 p = spu_madd(
 			  spu_madd(x, spu_splats(-0.191502161678719066f), spu_splats(0.697570460207922770f)),
 			  x,
 			  spu_splats(0.492659620528969547f)
 			  );
  vec_float4 p3 = spu_mul(p, spu_mul(p, p));
-  vec_int4 quot; 
-  __calcQuot(xexp);
+  vec_int4 quot = __cbrtf4_calc_quot(xexp);
  vec_int4 modval = spu_sub(spu_sub(xexp,quot), spu_sl(quot,1)); // mod = xexp - 3*quotient
-  vec_float4 factor = spu_splats((float)(1.0/_CBRTF_H_sqr_cbrt2));
-  factor = spu_sel(factor, spu_splats((float)(1.0/_CBRTF_H_cbrt2)), spu_cmpeq(modval,-1));
+  vec_float4 factor = spu_splats((float)(1.0/__CBRTF_sqr_cbrt2));
+  factor = spu_sel(factor, spu_splats((float)(1.0/__CBRTF_cbrt2)), spu_cmpeq(modval,-1));
  factor = spu_sel(factor, spu_splats((float)(      1.0)), spu_cmpeq(modval, 0));
-  factor = spu_sel(factor, spu_splats((float)(    _CBRTF_H_cbrt2)), spu_cmpeq(modval, 1));
-  factor = spu_sel(factor, spu_splats((float)(_CBRTF_H_sqr_cbrt2)), spu_cmpeq(modval, 2));
+  factor = spu_sel(factor, spu_splats((float)(    __CBRTF_cbrt2)), spu_cmpeq(modval, 1));
+  factor = spu_sel(factor, spu_splats((float)(__CBRTF_sqr_cbrt2)), spu_cmpeq(modval, 2));

  vec_float4 pre  = spu_mul(p, factor);
  vec_float4 numr = spu_madd(x , spu_splats(2.0f), p3);
  vec_float4 denr = spu_madd(p3, spu_splats(2.0f), x );
-  vec_float4 res = spu_mul(pre, divf4(numr, denr));
-  res = ldexpf4(res, quot);
+  vec_float4 res = spu_mul(pre, _divf4(numr, denr));
+  res = _ldexpf4(res, quot);

  return spu_sel(spu_sel(res, spu_orc(res,sgnmask), negmask),
 		 zeros,
 		 zeromask);
 }

-/*
-_FUNC_DEF(vec_float4, cbrtf4, (vec_float4 x))
-{
-  vec_uchar16 neg  = (vec_uchar16)spu_cmpgt(spu_splats(0.0f), x);
-  vec_float4  sbit = (vec_float4)spu_splats((int)0x80000000);
-  vec_float4 absx = spu_andc(x, sbit);
-  vec_float4 res = exp2f4(spu_mul(spu_splats((float)0.3333333333333f), log2f4(absx)));
-  res = spu_sel(res, spu_or(sbit, res), neg);
-  return res;
-}
-*/
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/ceild2.h
+++ b/Extras/simdmathlibrary/spu/simdmath/ceild2.h
@@ -27,11 +27,14 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_CEILD2_H___
+#define ___SIMD_MATH_CEILD2_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector double
-ceild2(vector double in)
+static inline vector double
+_ceild2(vector double in)
 {
  vec_uchar16 swap_words = ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11});
  vec_uchar16 splat_hi = ((vec_uchar16){0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11});
@@ -83,7 +86,7 @@ ceild2(vector double in)
  insert =spu_andc(spu_andc(e_sign, e_00), exp_ge0);

  /* replace insert
- */
+   */
  in = spu_sel(in, (vec_double2)insert, spu_andc((vec_ullong2)mask, sign));

  /* in + addend
@@ -92,3 +95,5 @@ ceild2(vector double in)

  return (out);
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/ceilf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/ceilf4.h
@@ -27,28 +27,32 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_CEILF4_H___
+#define ___SIMD_MATH_CEILF4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector float
-ceilf4 (vector float x)
+static inline vector float
+_ceilf4 (vector float x)
 {
-   vec_int4   xi, xi1;
-   vec_uint4  inrange;
-   vec_float4 truncated, truncated1;
+  vec_int4   xi, xi1;
+  vec_uint4  inrange;
+  vec_float4 truncated, truncated1;
    
-   // Find truncated value and one greater.
+  // Find truncated value and one greater.

-   inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x4b000000), x );
+  inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x4b000000), x );

-   xi = spu_convts( x, 0 );
-   xi1 = spu_add( xi, 1 );
+  xi = spu_convts( x, 0 );
+  xi1 = spu_add( xi, 1 );

-   truncated = spu_sel( x, spu_convtf( xi, 0 ), inrange );
-   truncated1 = spu_sel( x, spu_convtf( xi1, 0 ), inrange );
+  truncated = spu_sel( x, spu_convtf( xi, 0 ), inrange );
+  truncated1 = spu_sel( x, spu_convtf( xi1, 0 ), inrange );

-   // If truncated value is less than input, add one.
+  // If truncated value is less than input, add one.

-   return spu_sel( truncated, truncated1, spu_cmpgt( x, truncated ) );
+  return spu_sel( truncated, truncated1, spu_cmpgt( x, truncated ) );
 }

+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/copysignd2.h
+++ b/Extras/simdmathlibrary/spu/simdmath/copysignd2.h
@@ -27,13 +27,17 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_COPYSIGND2_H___
+#define ___SIMD_MATH_COPYSIGND2_H___

 #include <simdmath.h>
 #include <spu_intrinsics.h>


-vector double copysignd2 (vector double x, vector double y)
+static inline vector double
+_copysignd2 (vector double x, vector double y)
 {
-   return spu_sel( x, y, spu_splats(0x8000000000000000ull) );
+  return spu_sel( x, y, spu_splats(0x8000000000000000ull) );
 }

+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/copysignf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/copysignf4.h
@@ -27,13 +27,17 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_COPYSIGNF4_H___
+#define ___SIMD_MATH_COPYSIGNF4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>


-vector float
-copysignf4 (vector float x, vector float y)
+static inline vector float
+_copysignf4 (vector float x, vector float y)
 {
-   return spu_sel( x, y, spu_splats(0x80000000) );
+  return spu_sel( x, y, spu_splats(0x80000000) );
 }

+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/cosd2.h
+++ b/Extras/simdmathlibrary/spu/simdmath/cosd2.h
@@ -0,0 +1,46 @@
+/* cosd2 - Computes the cosine  of the each of two double slots.
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ___SIMD_MATH_COSD2_H___
+#define ___SIMD_MATH_COSD2_H___
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+#include <simdmath/sincosd2.h>
+
+static inline vector double
+_cosd2 (vector double x)
+{
+  vec_double2 s, c;
+  _sincosd2(x, &s, &c);
+  return c;
+}
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/cosf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/cosf4.h
@@ -0,0 +1,46 @@
+/* cosf4 - Computes the cosine of each of the four slots by using a polynomial approximation
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ___SIMD_MATH_COSF4_H___
+#define ___SIMD_MATH_COSF4_H___
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+#include <simdmath/sincosf4.h>
+
+static inline vector float
+_cosf4 (vector float x)
+{
+  vec_float4 s, c;
+  _sincosf4(x, &s, &c);
+  return c;
+}
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/divd2.h
+++ b/Extras/simdmathlibrary/spu/simdmath/divd2.h
@@ -27,15 +27,21 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_DIVD2_H___
+#define ___SIMD_MATH_DIVD2_H___
+
 // Equal to numer * recipd2(denom)
 // See recipd2 for results of special values.

 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector double
-divd2 (vector double numer, vector double denom)
+#include <simdmath/recipd2.h>
+
+static inline vector double
+_divd2 (vector double numer, vector double denom)
 {
-   return spu_mul( numer, recipd2( denom ) );
+  return spu_mul( numer, _recipd2( denom ) );
 }

+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/divf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/divf4.h
@@ -27,20 +27,24 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_DIVF4_H___
+#define ___SIMD_MATH_DIVF4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector float
-divf4 (vector float numer, vector float denom)
+static inline vector float
+_divf4 (vector float numer, vector float denom)
 {
-   // Reciprocal estimate and 1 Newton-Raphson iteration.
-   // Uses constant of 1.0 + 1 ulp to improve accuracy.
+  // Reciprocal estimate and 1 Newton-Raphson iteration.
+  // Uses constant of 1.0 + 1 ulp to improve accuracy.

-   vector float y0, y0numer;
-   vector float oneish = (vector float)spu_splats(0x3f800001);
+  vector float y0, y0numer;
+  vector float oneish = (vector float)spu_splats(0x3f800001);

-   y0 = spu_re( denom );
-   y0numer = spu_mul( numer, y0 );
-   return spu_madd( spu_nmsub( denom, y0, oneish ), y0numer, y0numer );
+  y0 = spu_re( denom );
+  y0numer = spu_mul( numer, y0 );
+  return spu_madd( spu_nmsub( denom, y0, oneish ), y0numer, y0numer );
 }

+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/divi4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/divi4.h
@@ -0,0 +1,67 @@
+/* divi4 - 
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ___SIMD_MATH_DIVI4_H___
+#define ___SIMD_MATH_DIVI4_H___
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+#include <simdmath/divu4.h>
+
+// divi4 - for each of four integer slots, compute quotient and remainder of numer/denom
+// and store in divi4_t struct.  Divide by zero produces quotient = 0, remainder = numerator.
+
+static inline divi4_t
+_divi4 (vector signed int numer, vector signed int denom)
+{
+  divu4_t resAbs;
+  divi4_t res;
+  vec_uint4 numerPos, denomPos, quotNeg;
+  vec_uint4 numerAbs, denomAbs;
+
+  // Determine whether result needs sign change
+
+  numerPos = spu_cmpgt( numer, -1 );
+  denomPos = spu_cmpgt( denom, -1 );
+  quotNeg = spu_xor( numerPos, denomPos );
+    
+  // Use absolute values of numerator, denominator
+
+  numerAbs = (vec_uint4)spu_sel( spu_sub( 0, numer ), numer, numerPos );
+  denomAbs = (vec_uint4)spu_sel( spu_sub( 0, denom ), denom, denomPos );
+
+  resAbs = _divu4(numerAbs, denomAbs);
+
+  res.quot = spu_sel( (vec_int4)resAbs.quot, spu_sub( 0, (vec_int4)resAbs.quot ), quotNeg );
+  res.rem = spu_sel( spu_sub( 0, (vec_int4)resAbs.rem ), (vec_int4)resAbs.rem, numerPos );
+  return res;
+}
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/divu4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/divu4.h
@@ -27,44 +27,48 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_DIVU4_H___
+#define ___SIMD_MATH_DIVU4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

 // divu4 - for each of four unsigned integer slots, compute quotient and remainder of numer/denom
 // and store in divu4_t struct.  Divide by zero produces quotient = 0, remainder = numerator.

-divu4_t divu4 (vector unsigned int numer, vector unsigned int denom)
+static inline divu4_t
+_divu4 (vector unsigned int numer, vector unsigned int denom)
 {
-   divu4_t res;
-   vec_int4 shift;
-   vec_uint4 quot, newQuot;
-   vec_uint4 denomZeros, numerZeros, denomLeft, oneLeft, denomShifted, oneShifted;
-   vec_uint4 newNum, skip, cont;
-   int       anyCont;
+  divu4_t res;
+  vec_int4 shift;
+  vec_uint4 quot, newQuot;
+  vec_uint4 denomZeros, numerZeros, denomLeft, oneLeft, denomShifted, oneShifted;
+  vec_uint4 newNum, skip, cont;
+  int       anyCont;

-   // Get difference of leading zeros.
-   // Any possible negative value will be interpreted as a shift > 31
+  // Get difference of leading zeros.
+  // Any possible negative value will be interpreted as a shift > 31

-   denomZeros = spu_cntlz( denom );
-   numerZeros = spu_cntlz( numer );
+  denomZeros = spu_cntlz( denom );
+  numerZeros = spu_cntlz( numer );

-   shift = (vec_int4)spu_sub( denomZeros, numerZeros );
+  shift = (vec_int4)spu_sub( denomZeros, numerZeros );

-   // Shift denom to align leading one with numerator's
+  // Shift denom to align leading one with numerator's

-   denomShifted = spu_sl( denom, (vec_uint4)shift );
-   oneShifted = spu_sl( spu_splats(1U), (vec_uint4)shift );
-   oneShifted = spu_sel( oneShifted, spu_splats(0U), spu_cmpeq( denom, 0 ) );
+  denomShifted = spu_sl( denom, (vec_uint4)shift );
+  oneShifted = spu_sl( spu_splats(1U), (vec_uint4)shift );
+  oneShifted = spu_sel( oneShifted, spu_splats(0U), spu_cmpeq( denom, 0 ) );

-   // Shift left all leading zeros.
+  // Shift left all leading zeros.

-   denomLeft = spu_sl( denom, denomZeros );
-   oneLeft = spu_sl( spu_splats(1U), denomZeros );
+  denomLeft = spu_sl( denom, denomZeros );
+  oneLeft = spu_sl( spu_splats(1U), denomZeros );

-   quot = spu_splats(0U);
+  quot = spu_splats(0U);

-   do
-   {
+  do
+    {
      cont = spu_cmpgt( oneShifted, 0U );
      anyCont = spu_extract( spu_gather( cont ), 0 );

@@ -87,11 +91,12 @@ divu4_t divu4 (vector unsigned int numer, vector unsigned int denom)

      quot = spu_sel( newQuot, quot, skip );
      numer = spu_sel( newNum, numer, spu_orc(skip,cont) );
-   } 
-   while ( anyCont );
+    } 
+  while ( anyCont );

-   res.quot = quot;
-   res.rem = numer;
-   return res;
+  res.quot = quot;
+  res.rem = numer;
+  return res;
 }

+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/exp2f4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/exp2f4.h
@@ -27,6 +27,8 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_EXP2F4_H___
+#define ___SIMD_MATH_EXP2F4_H___

 #include <simdmath.h>
 #include <spu_intrinsics.h>
@@ -72,10 +74,10 @@
 */


-#define _EXP2F_H_LN2	0.69314718055995f	/* ln(2) */
+#define __EXP2F_LN2	0.69314718055995f	/* ln(2) */

-vector float
-exp2f4 (vector float x)
+static inline vector float
+_exp2f4 (vector float x)
 {
  vec_int4 ix;
  vec_uint4 overflow, underflow;
@@ -91,7 +93,7 @@ exp2f4 (vector float x)
  bias = (vec_float4)(spu_andc(spu_splats(0x3F7FFFFFu), (vec_uint4)bias));
  ix = spu_convts(spu_add(x, bias), 0);
  frac = spu_sub(spu_convtf(ix, 0), x);
-  frac = spu_mul(frac, spu_splats(_EXP2F_H_LN2));
+  frac = spu_mul(frac, spu_splats(__EXP2F_LN2));

  // !!! HRD Changing weird un-understandable and incorrect overflow handling code
  //overflow = spu_sel((vec_uint4)spu_splats(0x7FFFFFFF), (vec_uint4)x, (vec_uchar16)spu_splats(0x80000000));  
@@ -99,7 +101,7 @@ exp2f4 (vector float x)
  underflow = spu_cmpgt(spu_splats(-126.0f), x);

  //exp_int = (vec_float4)(spu_sl(spu_add(ix, 127), 23)); // !!! HRD <- changing this to correct for
-                                                          // !!! overflow (x >= 127.999999f)
+  // !!! overflow (x >= 127.999999f)
  exp_int = (vec_float4)(spu_sl(spu_add(ix, 126), 23));   // !!! HRD <- add with saturation
  exp_int = spu_add(exp_int, exp_int);                    // !!! HRD

@@ -123,9 +125,11 @@ exp2f4 (vector float x)
  result = spu_mul(exp_frac, exp_int);

  /* Handle overflow */
-  result = spu_sel(result, (vec_float4)spu_splats(0x7FFFFFFF), (vec_uchar16)overflow); 
-  result = spu_sel(result, (vec_float4)spu_splats(0), (vec_uchar16)underflow);
+  result = spu_sel(result, (vec_float4)spu_splats(0x7FFFFFFF), overflow); 
+  result = spu_sel(result, (vec_float4)spu_splats(0), underflow);
  //result = spu_sel(result, (vec_float4)(overflow), spu_cmpgt((vec_uint4)(ix), 255));

  return (result);
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/expf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/expf4.h
@@ -27,37 +27,44 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_EXPF4_H___
+#define ___SIMD_MATH_EXPF4_H___

 #include <simdmath.h>
 #include <spu_intrinsics.h>

-#define _EXPF_H_C1     ((float)-0.6931470632553101f)
-#define _EXPF_H_C2     ((float)-1.1730463525082e-7f)
+#include <simdmath/divf4.h>
+#include <simdmath/ldexpf4.h>

-#define _EXPF_H_INVLN2 ((float)1.4426950408889634f)
+#define __EXPF_C1     -0.6931470632553101f
+#define __EXPF_C2     -1.1730463525082e-7f

-vector float
-expf4 (vector float x)
+#define __EXPF_INVLN2  1.4426950408889634f
+
+static inline vector float
+_expf4 (vector float x)
 {
-  vec_uchar16 xnegmask = (vec_uchar16)spu_cmpgt(spu_splats(0.0f), x);
-  vec_float4  goffset  = spu_sel(spu_splats((float) 0.5f),spu_splats((float)-0.5f),xnegmask);
-  vec_float4 g  = spu_mul(x, spu_splats(_EXPF_H_INVLN2));  
+  vec_uint4 xnegmask = spu_cmpgt(spu_splats(0.0f), x);
+  vec_float4  goffset  = spu_sel(spu_splats(0.5f),spu_splats(-0.5f),xnegmask);
+  vec_float4 g  = spu_mul(x, spu_splats(__EXPF_INVLN2));  
  vec_int4 xexp = spu_convts(spu_add(g, goffset),0);
  
  g = spu_convtf(xexp, 0);
-  g = spu_madd(g, spu_splats(_EXPF_H_C2), spu_madd(g, spu_splats(_EXPF_H_C1), x));
+  g = spu_madd(g, spu_splats(__EXPF_C2), spu_madd(g, spu_splats(__EXPF_C1), x));
  vec_float4 z  = spu_mul(g, g);
-  vec_float4 a = spu_mul(z, spu_splats((float)0.0999748594f));
+  vec_float4 a = spu_mul(z, spu_splats(0.0999748594f));
  vec_float4 b = spu_mul(g, 
 			 spu_madd(z, 
-				  spu_splats((float)0.0083208258f), 
-				  spu_splats((float)0.4999999992f)
+				  spu_splats(0.0083208258f), 
+				  spu_splats(0.4999999992f)
 				  )
 			 );
  
-  vec_float4 foo  = divf4(spu_add(spu_splats(1.0f), spu_add(a, b)),
-			  spu_add(spu_splats(1.0f), spu_sub(a, b)));
+  vec_float4 foo  = _divf4(spu_add(spu_splats(1.0f), spu_add(a, b)),
+			   spu_add(spu_splats(1.0f), spu_sub(a, b)));

-  return ldexpf4(foo, xexp);
+  return _ldexpf4(foo, xexp);
  
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/expm1f4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/expm1f4.h
@@ -27,28 +27,36 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_EXPMLF4_H___
+#define ___SIMD_MATH_EXPMLF4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-#define _EXPM1F_H_ln1by2 ((float)-0.6931471805599f)
-#define _EXPM1F_H_ln3by2 ((float) 0.4054651081082f)
+#include <simdmath/expf4.h>
+#include <simdmath/divf4.h>

-vector float
-expm1f4 (vector float x)
+#define __EXPM1F_ln1by2 -0.6931471805599f
+#define __EXPM1F_ln3by2  0.4054651081082f
+
+static inline vector float
+_expm1f4 (vector float x)
 {
-  vec_uchar16 nearzeromask = (vec_uchar16)spu_and(spu_cmpgt(x, spu_splats(_EXPM1F_H_ln1by2)),
-                                                  spu_cmpgt(spu_splats(_EXPM1F_H_ln3by2), x));
+  vec_uint4 nearzeromask = spu_and(spu_cmpgt(x, spu_splats(__EXPM1F_ln1by2)),
+				   spu_cmpgt(spu_splats(__EXPM1F_ln3by2), x));
  vec_float4 x2 = spu_mul(x,x);
  vec_float4 d0, d1, n0, n1;
  
-  d0 = spu_madd(x , spu_splats((float)-0.3203561199f), spu_splats((float)0.9483177697f));
-  d1 = spu_madd(x2, spu_splats((float) 0.0326527809f), d0);
+  d0 = spu_madd(x , spu_splats(-0.3203561199f), spu_splats(0.9483177697f));
+  d1 = spu_madd(x2, spu_splats(0.0326527809f), d0);
  
-  n0 = spu_madd(x , spu_splats((float)0.1538026623f), spu_splats((float)0.9483177732f));
-  n1 = spu_madd(x , spu_splats((float)0.0024490478f), spu_splats((float)0.0305274668f));
+  n0 = spu_madd(x , spu_splats(0.1538026623f), spu_splats(0.9483177732f));
+  n1 = spu_madd(x , spu_splats(0.0024490478f), spu_splats(0.0305274668f));
  n1 = spu_madd(x2, n1, n0);
 
-  return spu_sel(spu_sub(expf4(x), spu_splats(1.0f)),
-                 spu_mul(x, divf4(n1, d1)),
+  return spu_sel(spu_sub(_expf4(x), spu_splats(1.0f)),
+                 spu_mul(x, _divf4(n1, d1)),
                 nearzeromask);
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/fabsd2.h
+++ b/Extras/simdmathlibrary/spu/simdmath/fabsd2.h
@@ -27,11 +27,16 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_FABSD2_H___
+#define ___SIMD_MATH_FABSD2_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-
-vector double fabsd2 (vector double x)
+static inline vector double
+_fabsd2 (vector double x)
 {
-   return (vec_double2)spu_andc( (vec_ullong2)x, spu_splats(0x8000000000000000ull) );
+  return (vec_double2)spu_andc( (vec_ullong2)x, spu_splats(0x8000000000000000ull) );
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/fabsf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/fabsf4.h
@@ -27,11 +27,16 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_FABSF4_H___
+#define ___SIMD_MATH_FABSF4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector float fabsf4 (vector float x)
+static inline vector float
+_fabsf4 (vector float x)
 {
-   return (vec_float4)spu_andc( (vec_uint4)x, spu_splats(0x80000000) );
+  return (vec_float4)spu_andc( (vec_uint4)x, spu_splats(0x80000000) );
 }

+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/fdimd2.h
+++ b/Extras/simdmathlibrary/spu/simdmath/fdimd2.h
@@ -27,13 +27,16 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_FDIMD2_H___
+#define ___SIMD_MATH_FDIMD2_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

 /* fdim_v - compute the positive difference of x and y.
 */
-vector double
-fdimd2 (vector double x, vector double y)
+static inline vector double
+_fdimd2 (vector double x, vector double y)
 {
  vec_double2 v;
  vec_uint4 mask;
@@ -44,3 +47,5 @@ fdimd2 (vector double x, vector double y)

  return (v);
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/fdimf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/fdimf4.h
@@ -27,12 +27,17 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_FDIMF4_H___
+#define ___SIMD_MATH_FDIMF4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector float
-fdimf4 (vector float x, vector float y)
+static inline vector float
+_fdimf4 (vector float x, vector float y)
 {
  vec_float4 diff = spu_sub(x,y);
  return spu_sel(spu_splats(0.0f),diff, spu_cmpgt(x,y));
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/floord2.h
+++ b/Extras/simdmathlibrary/spu/simdmath/floord2.h
@@ -27,11 +27,14 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_FLOORD2_H___
+#define ___SIMD_MATH_FLOORD2_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector double
-floord2(vector double in)
+static inline vector double
+_floord2(vector double in)
 {
  vec_uchar16 swap_words = ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11});
  vec_uchar16 splat_hi = ((vec_uchar16){0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11});
@@ -74,7 +77,7 @@ floord2(vector double in)
  equal0 = spu_cmpeq(spu_and((vec_uint4)in, mask), 0);
  addend = spu_andc(spu_andc(mask_1, pos), spu_and(equal0, spu_shuffle(equal0, equal0, swap_words)));

-  /* insert 
+  /* insert
   */
  e_0 = spu_cmpeq(spu_andc((vec_uint4)in, (vec_uint4)sign), zero);
  e_00 = spu_and(e_0, spu_shuffle(e_0, e_0, swap_words));
@@ -92,3 +95,5 @@ floord2(vector double in)

  return (out);
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/floorf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/floorf4.h
@@ -27,28 +27,32 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_FLOORF4_H___
+#define ___SIMD_MATH_FLOORF4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector float
-floorf4 (vector float x)
+static inline vector float
+_floorf4 (vector float x)
 {
-   vec_int4   xi, xi1;
-   vec_uint4  inrange;
-   vec_float4 truncated, truncated1;
+  vec_int4   xi, xi1;
+  vec_uint4  inrange;
+  vec_float4 truncated, truncated1;
    
-   // Find truncated value and one less.
+  // Find truncated value and one less.

-   inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x4b000000), x );
+  inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x4b000000), x );

-   xi = spu_convts( x, 0 );
-   xi1 = spu_add( xi, -1 );
+  xi = spu_convts( x, 0 );
+  xi1 = spu_add( xi, -1 );

-   truncated = spu_sel( x, spu_convtf( xi, 0 ), inrange );
-   truncated1 = spu_sel( x, spu_convtf( xi1, 0 ), inrange );
+  truncated = spu_sel( x, spu_convtf( xi, 0 ), inrange );
+  truncated1 = spu_sel( x, spu_convtf( xi1, 0 ), inrange );

-   // If truncated value is greater than input, subtract one.
+  // If truncated value is greater than input, subtract one.

-   return spu_sel( truncated, truncated1, spu_cmpgt( truncated, x ) );
+  return spu_sel( truncated, truncated1, spu_cmpgt( truncated, x ) );
 }

+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/fmad2.h
+++ b/Extras/simdmathlibrary/spu/simdmath/fmad2.h
@@ -27,11 +27,16 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_FMAD2_H___
+#define ___SIMD_MATH_FMAD2_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector double
-fmad2 (vector double x, vector double y, vector double z)
+static inline vector double
+_fmad2 (vector double x, vector double y, vector double z)
 {
  return spu_madd(x,y,z);
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/fmaf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/fmaf4.h
@@ -27,12 +27,16 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_FMAF4_H___
+#define ___SIMD_MATH_FMAF4_H___

 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector float
-fmaf4 (vector float x, vector float y, vector float z)
+static inline vector float
+_fmaf4 (vector float x, vector float y, vector float z)
 {
  return spu_madd(x,y,z);
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/fmaxd2.h
+++ b/Extras/simdmathlibrary/spu/simdmath/fmaxd2.h
@@ -27,6 +27,8 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_FMAXD2_H___
+#define ___SIMD_MATH_FMAXD2_H___

 #include <simdmath.h>
 #include <spu_intrinsics.h>
@@ -36,8 +38,8 @@
 * is returned.
 */

-vector double
-fmaxd2 (vector double x, vector double y)
+static inline vector double
+_fmaxd2 (vector double x, vector double y)
 {
  vec_ullong2 selector, denorm;
  vec_double2 x_offset, y_offset, diff;
@@ -66,3 +68,4 @@ fmaxd2 (vector double x, vector double y)
  return spu_sel(x, y, selector);
 }

+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/fmaxf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/fmaxf4.h
@@ -27,14 +27,17 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_FMAXF4_H___
+#define ___SIMD_MATH_FMAXF4_H___

 #include <simdmath.h>
 #include <spu_intrinsics.h>

                          
-vector float
-fmaxf4 (vector float x, vector float y)
+static inline vector float
+_fmaxf4 (vector float x, vector float y)
 {
-   return spu_sel( x, y, spu_cmpgt( y, x ) );
+  return spu_sel( x, y, spu_cmpgt( y, x ) );
 }

+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/fmind2.h
+++ b/Extras/simdmathlibrary/spu/simdmath/fmind2.h
@@ -27,6 +27,9 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_FMIND2_H___
+#define ___SIMD_MATH_FMIND2_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>
                          
@@ -35,8 +38,8 @@
 * is returned.
 */

-vector double
-fmind2 (vector double x, vector double y)
+static inline vector double
+_fmind2 (vector double x, vector double y)
 {
  vec_ullong2 selector, denorm;
  vec_double2 x_offset, y_offset, diff;
@@ -65,3 +68,4 @@ fmind2 (vector double x, vector double y)
  return spu_sel(x, y, selector);
 }

+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/fminf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/fminf4.h
@@ -27,14 +27,17 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_FMINF4_H___
+#define ___SIMD_MATH_FMINF4_H___

 #include <simdmath.h>
 #include <spu_intrinsics.h>

         
-vector float
-fminf4 (vector float x, vector float y)
+static inline vector float
+_fminf4 (vector float x, vector float y)
 {
-   return spu_sel( x, y, spu_cmpgt( x, y ) );
+  return spu_sel( x, y, spu_cmpgt( x, y ) );
 }

+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/fmodd2.h
+++ b/Extras/simdmathlibrary/spu/simdmath/fmodd2.h
@@ -27,10 +27,14 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_FMODD2_H___
+#define ___SIMD_MATH_FMODD2_H___

 #include <simdmath.h>
 #include <spu_intrinsics.h>

+#include <simdmath/_vec_utils.h>
+
 /* 
 * a vector is returned that contains the remainder of xi/yi,
 * for coresponding elements of vector double x and vector double y,
@@ -41,11 +45,8 @@
 * magnitude less than |yi|
 */

-static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb);
-static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb);
-static inline vec_uint4 _vec_eq64_half(vec_uint4 aa, vec_uint4 bb);
-
-vector double fmodd2(vector double x, vector double y)
+static inline vector double
+_fmodd2(vector double x, vector double y)
 {
  int shift0, shift1;
  vec_uchar16 swap_words = (vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11};
@@ -82,20 +83,20 @@ vector double fmodd2(vector double x, vector double y)
  exp_y  = spu_rlmask(y_hi, -20);

  // y>x
-  resultx = _vec_gt64(abs_y, abs_x);
+  resultx = __vec_gt64(abs_y, abs_x);

  //is Inf,  is Nan
  x_7ff = spu_cmpgt(x_hi, spu_splats((unsigned int)0x7fefffff));
-  x_inf = _vec_eq64_half(abs_x, ((vec_uint4){0x7ff00000,0x0,0x7ff00000,0x0}));
+  x_inf = __vec_eq64_half(abs_x, ((vec_uint4){0x7ff00000,0x0,0x7ff00000,0x0}));
  x_nan = spu_andc(x_7ff,  x_inf);

  y_7ff = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7fefffff));
-  y_inf = _vec_eq64_half(abs_y, ((vec_uint4){0x7ff00000,0x0,0x7ff00000,0x0}));
+  y_inf = __vec_eq64_half(abs_y, ((vec_uint4){0x7ff00000,0x0,0x7ff00000,0x0}));
  y_nan = spu_andc(y_7ff,  y_inf);
  
  // is zero
-  zero_x = _vec_eq64_half(abs_x, spu_splats((unsigned int)0x0));
-  zero_y = _vec_eq64_half(abs_y, spu_splats((unsigned int)0x0));
+  zero_x = __vec_eq64_half(abs_x, spu_splats((unsigned int)0x0));
+  zero_y = __vec_eq64_half(abs_y, spu_splats((unsigned int)0x0));


  /* Determine ilogb of abs_x and abs_y and 
@@ -121,8 +122,8 @@ vector double fmodd2(vector double x, vector double y)
  cnt_y = spu_add(spu_shuffle(cnt_y, cnt_y, splat_hi), -11);

  /*
-  mant_x_norm = spu_andc(spu_sel(implied_1, abs_x, mant_mask), zero_x);
-  mant_y_norm = spu_andc(spu_sel(implied_1, abs_y, mant_mask), zero_y);
+    mant_x_norm = spu_andc(spu_sel(implied_1, abs_x, mant_mask), zero_x);
+    mant_y_norm = spu_andc(spu_sel(implied_1, abs_y, mant_mask), zero_y);
  */
  //norm
  mant_x_norm = spu_or(implied_1, frac_x);
@@ -225,8 +226,8 @@ vector double fmodd2(vector double x, vector double y)
  shift0 = spu_extract(cnt, 0);			    
  shift1 = spu_extract(cnt, 2);			    
  /*
-  norm0 = spu_slqwbytebc(spu_slqw(spu_andc(mant_x0, implied_1), shift0), shift0);
-  norm1 = spu_slqwbytebc(spu_slqw(spu_andc(mant_x1, implied_1), shift1), shift1);
+    norm0 = spu_slqwbytebc(spu_slqw(spu_andc(mant_x0, implied_1), shift0), shift0);
+    norm1 = spu_slqwbytebc(spu_slqw(spu_andc(mant_x1, implied_1), shift1), shift1);
  */
  norm0 = spu_slqwbytebc(spu_slqw(mant_x0, shift0), shift0);
  norm1 = spu_slqwbytebc(spu_slqw(mant_x1, shift1), shift1);
@@ -236,11 +237,11 @@ vector double fmodd2(vector double x, vector double y)
  
  //denorm
  /*
-  shift = spu_add((vec_int4)exp_y, -1);
-  shift0 = spu_extract(shift, 0);
-  shift1 = spu_extract(shift, 2);
-  denorm0 = spu_slqwbytebc(spu_slqw(mant_x0, shift0), shift0);
-  denorm1 = spu_slqwbytebc(spu_slqw(mant_x1, shift1), shift1);
+    shift = spu_add((vec_int4)exp_y, -1);
+    shift0 = spu_extract(shift, 0);
+    shift1 = spu_extract(shift, 2);
+    denorm0 = spu_slqwbytebc(spu_slqw(mant_x0, shift0), shift0);
+    denorm1 = spu_slqwbytebc(spu_slqw(mant_x1, shift1), shift1);
  */
  shift = spu_add(power, -1);
  shift0 = spu_extract(shift, 0);
@@ -278,25 +279,4 @@ vector double fmodd2(vector double x, vector double y)
  return ((vec_double2)result);
 }

-
-/*
- * extend spu_cmpgt function to 64bit data
- */
-static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb)
-{
-  vec_uint4 gt = spu_cmpgt(aa, bb);  // aa > bb
-  vec_uint4 eq = spu_cmpeq(aa, bb);  // aa = bb
-  return spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4))); // only higher is right
-}
-static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb)
-{
-  vec_uint4 gt_hi = _vec_gt64_half(aa, bb); // only higher is right
-  return spu_shuffle(gt_hi, gt_hi, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
-}
-
-static inline vec_uint4 _vec_eq64_half(vec_uint4 aa, vec_uint4 bb)
-{
-  vec_uint4 eq = spu_cmpeq(aa, bb);
-  return spu_and(eq, spu_shuffle(eq, eq, ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11})));
-}
-
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/fmodf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/fmodf4.h
@@ -27,60 +27,68 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_FMODF4_H___
+#define ___SIMD_MATH_FMODF4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

+#include <simdmath/divf4.h>
+#include <simdmath/fabsf4.h>
+#include <simdmath/copysignf4.h>
+
 //
 // This returns an accurate result when |divf4(x,y)| < 2^20 and |x| < 2^128, and otherwise returns zero.
 // If x == 0, the result is 0.
 // If x != 0 and y == 0, the result is undefined.

-vector float
-fmodf4 (vector float x, vector float y)
+static inline vector float
+_fmodf4 (vector float x, vector float y)
 {
-   vec_float4 q, xabs, yabs, qabs, xabs2;
-   vec_int4   qi0, qi1, qi2;
-   vec_float4 i0, i1, i2, r1, r2, i;
-   vec_uint4  inrange;
+  vec_float4 q, xabs, yabs, qabs, xabs2;
+  vec_int4   qi0, qi1, qi2;
+  vec_float4 i0, i1, i2, r1, r2, i;
+  vec_uint4  inrange;

-   // Find i = truncated_integer(|x/y|)
+  // Find i = truncated_integer(|x/y|)

-   // If |divf4(x,y)| < 2^20, the quotient is at most off by 1.0.
-   // Thus i is either the truncated quotient, one less, or one greater.
+  // If |divf4(x,y)| < 2^20, the quotient is at most off by 1.0.
+  // Thus i is either the truncated quotient, one less, or one greater.

-   q = divf4( x, y );
-   xabs = fabsf4( x );
-   yabs = fabsf4( y );
-   qabs = fabsf4( q );
-   xabs2 = spu_add( xabs, xabs );
+  q = _divf4( x, y );
+  xabs = _fabsf4( x );
+  yabs = _fabsf4( y );
+  qabs = _fabsf4( q );
+  xabs2 = spu_add( xabs, xabs );
    
-   inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x49800000), q );
-   inrange = spu_and( inrange, spu_cmpabsgt( (vec_float4)spu_splats(0x7f800000), x ) );
+  inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x49800000), q );
+  inrange = spu_and( inrange, spu_cmpabsgt( (vec_float4)spu_splats(0x7f800000), x ) );

-   qi1 = spu_convts( qabs, 0 );
-   qi0 = spu_add( qi1, -1 );
-   qi2 = spu_add( qi1, 1 );
+  qi1 = spu_convts( qabs, 0 );
+  qi0 = spu_add( qi1, -1 );
+  qi2 = spu_add( qi1, 1 );

-   i0 = spu_convtf( qi0, 0 );
-   i1 = spu_convtf( qi1, 0 );
-   i2 = spu_convtf( qi2, 0 );
+  i0 = spu_convtf( qi0, 0 );
+  i1 = spu_convtf( qi1, 0 );
+  i2 = spu_convtf( qi2, 0 );

-   // Correct i will be the largest one such that |x| - i*|y| >= 0.  Can test instead as 
-   // 2*|x| - i*|y| >= |x|:
-   // 
-   // With exact inputs, the negative-multiply-subtract gives the exact result rounded towards zero.  
-   // Thus |x| - i*|y| may be < 0 but still round to zero.  However, if 2*|x| - i*|y| < |x|, the computed
-   // answer will be rounded down to < |x|.  2*|x| can be represented exactly provided |x| < 2^128.
+  // Correct i will be the largest one such that |x| - i*|y| >= 0.  Can test instead as 
+  // 2*|x| - i*|y| >= |x|:
+  // 
+  // With exact inputs, the negative-multiply-subtract gives the exact result rounded towards zero.  
+  // Thus |x| - i*|y| may be < 0 but still round to zero.  However, if 2*|x| - i*|y| < |x|, the computed
+  // answer will be rounded down to < |x|.  2*|x| can be represented exactly provided |x| < 2^128.

-   r1 = spu_nmsub( i1, yabs, xabs2 );
-   r2 = spu_nmsub( i2, yabs, xabs2 );
+  r1 = spu_nmsub( i1, yabs, xabs2 );
+  r2 = spu_nmsub( i2, yabs, xabs2 );

-   i = i0;
-   i = spu_sel( i1, i, spu_cmpgt( xabs, r1 ) );
-   i = spu_sel( i2, i, spu_cmpgt( xabs, r2 ) );
+  i = i0;
+  i = spu_sel( i1, i, spu_cmpgt( xabs, r1 ) );
+  i = spu_sel( i2, i, spu_cmpgt( xabs, r2 ) );
   
-   i = copysignf4( i, q );
+  i = _copysignf4( i, q );

-   return spu_sel( spu_splats(0.0f), spu_nmsub( i, y, x ), inrange );
+  return spu_sel( spu_splats(0.0f), spu_nmsub( i, y, x ), inrange );
 }

+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/fpclassifyd2.h
+++ b/Extras/simdmathlibrary/spu/simdmath/fpclassifyd2.h
@@ -0,0 +1,83 @@
+/* fpclassifyd2 - for each element of vector x, return classification of x': FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ___SIMD_MATH_FPCLASSIFYD2_H___
+#define ___SIMD_MATH_FPCLASSIFYD2_H___
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+#include <math.h>
+
+static inline vector signed long long
+_fpclassifyd2 (vector double x)
+{
+  vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
+  vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
+  vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
+
+  vec_ullong2 sign = spu_splats(0x8000000000000000ull);
+  vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
+  vec_ullong2 signexpn = spu_splats(0xfff0000000000000ull);
+  vec_ullong2 zero = spu_splats(0x0000000000000000ull);
+
+  vec_ullong2 mask;
+  vec_llong2 classtype;
+  vec_uint4 cmpgt, cmpeq;
+
+  //FP_NORMAL: normal unless nan, infinity, zero, or denorm
+  classtype = spu_splats((long long)FP_NORMAL);
+
+  //FP_NAN: all-ones exponent and non-zero mantissa
+  cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)signexpn );
+  cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)signexpn );
+  mask = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ),
+			      spu_and( spu_shuffle( cmpeq, cmpeq, even ), 
+				       spu_shuffle( cmpgt, cmpgt, odd ) ) );
+  classtype = spu_sel( classtype, spu_splats((long long)FP_NAN), mask );
+
+  //FP_INFINITE: all-ones exponent and zero mantissa
+  mask = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
+  classtype = spu_sel( classtype, spu_splats((long long)FP_INFINITE), mask );
+
+  //FP_ZERO: zero exponent and zero mantissa
+  cmpeq = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
+  mask = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
+  classtype = spu_sel( classtype, spu_splats((long long)FP_ZERO), mask );
+   
+  //FP_SUBNORMAL: zero exponent and non-zero mantissa
+  cmpeq = spu_cmpeq( (vec_uint4)spu_and( (vec_ullong2)x, expn ), (vec_uint4)zero );
+  cmpgt = spu_cmpgt( (vec_uint4)spu_andc( (vec_ullong2)x, signexpn ), (vec_uint4)zero );
+  mask = (vec_ullong2)spu_and( spu_shuffle( cmpeq, cmpeq, even ), 
+			       spu_or( cmpgt, spu_shuffle( cmpgt, cmpgt, swapEvenOdd ) ) );
+  classtype = spu_sel( classtype, spu_splats((long long)FP_SUBNORMAL), mask );
+
+  return classtype;
+}
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/fpclassifyf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/fpclassifyf4.h
@@ -27,52 +27,41 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_FPCLASSIFYF4_H___
+#define ___SIMD_MATH_FPCLASSIFYF4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>
 #include <math.h>

-#ifndef FP_NAN
-#define FP_NAN			(0)
-#endif
-#ifndef FP_INFINITE
-#define FP_INFINITE		(1)
-#endif
-#ifndef FP_ZERO
-#define FP_ZERO			(2)
-#endif
-#ifndef FP_SUBNORMAL
-#define FP_SUBNORMAL	(3)
-#endif
-#ifndef FP_NORMAL
-#define FP_NORMAL		(4)
-#endif
-
-vector signed int
-fpclassifyf4 (vector float x)
+static inline vector signed int
+_fpclassifyf4 (vector float x)
 {
-   vec_uint4 zero = spu_splats((unsigned int)0x00000000);
+  vec_uint4 zero = spu_splats((unsigned int)0x00000000);

-   vec_uint4 mask;
-   vec_uint4 unclassified = spu_splats((unsigned int)0xffffffff);
-   vec_int4 classtype = (vec_int4)zero;
+  vec_uint4 mask;
+  vec_uint4 unclassified = spu_splats((unsigned int)0xffffffff);
+  vec_int4 classtype = (vec_int4)zero;

-   //FP_NAN: NaN not supported on SPU, never return FP_NAN
+  //FP_NAN: NaN not supported on SPU, never return FP_NAN

-   //FP_INFINITE: Inf not supported on SPU, never return FP_INFINITE
+  //FP_INFINITE: Inf not supported on SPU, never return FP_INFINITE

-   //FP_ZERO: zero exponent and zero mantissa
-   mask = spu_cmpeq( spu_andc( (vec_uint4)x, spu_splats((unsigned int)0x80000000)), zero );
-   classtype = spu_sel( classtype, spu_splats((int)FP_ZERO), mask );
-   unclassified = spu_andc( unclassified, mask );
+  //FP_ZERO: zero exponent and zero mantissa
+  mask = spu_cmpeq( spu_andc( (vec_uint4)x, spu_splats((unsigned int)0x80000000)), zero );
+  classtype = spu_sel( classtype, spu_splats((int)FP_ZERO), mask );
+  unclassified = spu_andc( unclassified, mask );

-   //FP_SUBNORMAL: zero exponent and non-zero mantissa
-   mask = spu_and( spu_cmpeq( spu_and( (vec_uint4)x, spu_splats((unsigned int)0x7f800000)), zero ),
-   				   spu_cmpgt( spu_and( (vec_uint4)x, spu_splats((unsigned int)0x007fffff)), zero ) );
-   classtype = spu_sel( classtype, spu_splats((int)FP_SUBNORMAL), mask );
-   unclassified = spu_andc( unclassified, mask );
+  //FP_SUBNORMAL: zero exponent and non-zero mantissa
+  mask = spu_and( spu_cmpeq( spu_and( (vec_uint4)x, spu_splats((unsigned int)0x7f800000)), zero ),
+		  spu_cmpgt( spu_and( (vec_uint4)x, spu_splats((unsigned int)0x007fffff)), zero ) );
+  classtype = spu_sel( classtype, spu_splats((int)FP_SUBNORMAL), mask );
+  unclassified = spu_andc( unclassified, mask );

-   //FP_NORMAL: none of the above
-   classtype = spu_sel( classtype, spu_splats((int)FP_NORMAL), unclassified );
+  //FP_NORMAL: none of the above
+  classtype = spu_sel( classtype, spu_splats((int)FP_NORMAL), unclassified );

-   return classtype;
+  return classtype;
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/frexpd2.h
+++ b/Extras/simdmathlibrary/spu/simdmath/frexpd2.h
@@ -0,0 +1,98 @@
+/* frexpd2 - for each element of vector x, return the normalized fraction and store the exponent of x'
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ___SIMD_MATH_FREXPD2_H___
+#define ___SIMD_MATH_FREXPD2_H___
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+#include <math.h>
+
+#define __FREXPD_DBL_NAN 0x7FF8000000000000ull
+
+static inline vector double
+_frexpd2 (vector double x, vector signed long long *pexp)
+{
+  vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
+  vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
+
+  vec_ullong2 maskdw = (vec_ullong2){0xffffffffffffffffull, 0ull};
+
+  vec_ullong2 sign = spu_splats(0x8000000000000000ull);
+  vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
+  vec_ullong2 zero = spu_splats(0x0000000000000000ull);
+
+  vec_ullong2 isnan, isinf, iszero;
+  vec_ullong2 e0, x0, x1;
+  vec_uint4 cmpgt, cmpeq, cmpzr;
+  vec_int4 lz, lz0, sh, ex;
+  vec_double2 fr, frac = (vec_double2)zero;
+
+  //NAN: x is NaN (all-ones exponent and non-zero mantissa)
+  cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
+  cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
+  isnan = (vec_ullong2)spu_or( cmpgt, spu_and( cmpeq, spu_rlqwbyte( cmpgt, -4 ) ) );
+  isnan = (vec_ullong2)spu_shuffle( isnan, isnan, even );
+  frac = spu_sel( frac, (vec_double2)spu_splats(__FREXPD_DBL_NAN), isnan );
+
+  //INF: x is infinite (all-ones exponent and zero mantissa)
+  isinf = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
+  frac = spu_sel( frac, x , isinf );
+
+  //x is zero (zero exponent and zero mantissa)
+  cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
+  iszero = (vec_ullong2)spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) );
+
+  frac = spu_sel( frac, (vec_double2)zero , iszero );
+  *pexp = spu_sel( *pexp, (vec_llong2)zero , iszero );
+
+  //Integer Exponent: if x is normal or subnormal
+
+  //...shift left to normalize fraction, zero shift if normal
+  lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) );
+  lz0 = (vec_int4)spu_shuffle( lz, lz, even );
+  sh = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)11) ), spu_cmpgt( lz0, (int)11 ) );
+  sh = spu_sel( sh, spu_add( sh, lz ), spu_cmpeq( lz0, (int)32 ) );
+
+  x0 = spu_slqw( spu_slqwbytebc( spu_and( (vec_ullong2)x, maskdw ), spu_extract(sh, 1) ), spu_extract(sh, 1) );
+  x1 = spu_slqw( spu_slqwbytebc( (vec_ullong2)x, spu_extract(sh, 3) ), spu_extract(sh, 3) );
+  fr = (vec_double2)spu_sel( x1, x0, maskdw );
+  fr = spu_sel( fr, (vec_double2)spu_splats(0x3FE0000000000000ull), expn );
+  fr = spu_sel( fr, x, sign );
+
+  e0 = spu_rlmaskqw( spu_rlmaskqwbyte(spu_and( (vec_ullong2)x, expn ),-6), -4 );
+  ex = spu_sel( spu_sub( (vec_int4)e0, spu_splats((int)1022) ), spu_sub( spu_splats((int)-1021), sh ), spu_cmpgt( sh, (int)0 ) );
+
+  frac = spu_sel( frac, fr, spu_nor( isnan, spu_or( isinf, iszero ) ) );
+  *pexp = spu_sel( *pexp, spu_extend( ex ), spu_nor( isnan, spu_or( isinf, iszero ) ) );
+
+  return frac;
+}
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/frexpf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/frexpf4.h
@@ -27,21 +27,26 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_FREXPF4_H___
+#define ___SIMD_MATH_FREXPF4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector float
-frexpf4 (vector float x, vector signed int *pexp)
+static inline vector float
+_frexpf4 (vector float x, vector signed int *pexp)
 {
  vec_int4 zeros = spu_splats((int)0);
-  vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(x, (vec_float4)zeros);
+  vec_uint4 zeromask = spu_cmpeq(x, (vec_float4)zeros);
 
-  vec_int4 expmask = spu_splats((int)0x7F800000);
-  vec_int4 e1 = spu_and((vec_int4)x, expmask);
+  vec_uint4 expmask = spu_splats(0x7F800000U);
+  vec_int4 e1 = spu_and((vec_int4)x, (vec_int4)expmask);
  vec_int4 e2 = spu_sub(spu_rlmask(e1,-23), spu_splats((int)126));
  *pexp = spu_sel(e2, zeros, zeromask);
 
-  vec_float4 m2 = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), (vec_uchar16)expmask);
+  vec_float4 m2 = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), expmask);

  return spu_sel(m2, (vec_float4)zeros, zeromask);
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/hypotd2.h
+++ b/Extras/simdmathlibrary/spu/simdmath/hypotd2.h
@@ -27,14 +27,21 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_HYPOTD2_H___
+#define ___SIMD_MATH_HYPOTD2_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector double
-hypotd2 (vector double x, vector double y)
+#include <simdmath/sqrtd2.h>
+
+static inline vector double
+_hypotd2 (vector double x, vector double y)
 {
  vec_double2 sum = spu_mul(x,x);
  sum = spu_madd(y,y,sum);

-  return sqrtd2(sum);
+  return _sqrtd2(sum);
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/hypotf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/hypotf4.h
@@ -27,14 +27,21 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_HYPOTF4_H___
+#define ___SIMD_MATH_HYPOTF4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector float
-hypotf4 (vector float x, vector float y)
+#include <simdmath/sqrtf4.h>
+
+static inline vector float
+_hypotf4 (vector float x, vector float y)
 {
  vec_float4 sum = spu_mul(x,x);
  sum = spu_madd(y,y,sum);
  
-  return sqrtf4(sum);
+  return _sqrtf4(sum);
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/ilogbd2.h
+++ b/Extras/simdmathlibrary/spu/simdmath/ilogbd2.h
@@ -0,0 +1,83 @@
+/* ilogbd2 - for each element of vector x, return integer exponent of normalized double x', FP_ILOGBNAN, or FP_ILOGB0
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ___SIMD_MATH_ILOGBD2_H___
+#define ___SIMD_MATH_ILOGBD2_H___
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+#include <limits.h>
+#include <math.h>
+
+static inline vector signed long long
+_ilogbd2 (vector double x)
+{
+  vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
+  vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
+  vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
+
+  vec_ullong2 sign = spu_splats(0x8000000000000000ull);
+  vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
+  vec_ullong2 zero = spu_splats(0x0000000000000000ull);
+
+  vec_ullong2 isnan, iszeroinf;
+  vec_llong2 ilogb = (vec_llong2)zero;
+  vec_llong2 e1, e2;
+  vec_uint4 cmpgt, cmpeq, cmpzr;
+  vec_int4 lz, lz0, lz1;
+
+  //FP_ILOGBNAN: x is NaN (all-ones exponent and non-zero mantissa)
+  cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
+  cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
+  isnan = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ),
+			       spu_and( spu_shuffle( cmpeq, cmpeq, even ), 
+					spu_shuffle( cmpgt, cmpgt, odd ) ) );
+  ilogb = spu_sel( ilogb, spu_splats((long long)FP_ILOGBNAN), isnan );
+
+  //FP_ILOGB0: x is zero (zero exponent and zero mantissa) or infinity (all-ones exponent and zero mantissa)
+  cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
+  iszeroinf = (vec_ullong2)spu_or( spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) ),
+				   spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) ) );
+  ilogb = spu_sel( ilogb, spu_splats((long long)FP_ILOGB0), iszeroinf );
+
+  //Integer Exponent: if x is normal or subnormal, return unbiased exponent of normalized double x
+  e1 = (vec_llong2)spu_and( (vec_llong2)x, (vec_llong2)expn );
+  e2 = (vec_llong2)spu_rlmaskqw( spu_rlmaskqwbyte(e1,-6), -4 );
+
+  lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) );
+  lz0 = (vec_int4)spu_shuffle( lz, lz, even );
+  lz0 = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)12) ), spu_cmpgt( lz0, (int)11 ) );
+  lz1 = spu_sel( (vec_int4)zero, spu_shuffle( lz, lz, odd), spu_cmpeq( lz0, (int)20 ) );
+
+  ilogb = spu_sel( ilogb, spu_extend( spu_sub( spu_sub( (vec_int4)e2, spu_splats((int)1023)), spu_add( lz0, lz1 ) ) ), spu_nor( isnan, iszeroinf ) );
+
+  return ilogb;
+}
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/ilogbf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/ilogbf4.h
@@ -27,22 +27,24 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_ILOGBF4_H___
+#define ___SIMD_MATH_ILOGBF4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>
+#include <limits.h>
 #include <math.h>

-#ifndef FP_ILOGB0
-#define FP_ILOGB0 ((int)0x80000001)
-#endif
-
-vector signed int
-ilogbf4 (vector float x)
+static inline vector signed int
+_ilogbf4 (vector float x)
 {
  vec_int4 minus127 = spu_splats((int)-127);

  vec_int4 e1 = spu_and((vec_int4)x, spu_splats((int)0x7F800000));
-  vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(e1, 0);
+  vec_uint4 zeromask = spu_cmpeq(e1, 0);
  vec_int4 e2 = spu_add(spu_rlmask(e1,-23), minus127);
  
  return spu_sel(e2, (vec_int4)spu_splats(FP_ILOGB0), zeromask);
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/irintf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/irintf4.h
@@ -30,10 +30,16 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_IRINTF4_H___
+#define ___SIMD_MATH_IRINTF4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector signed int  irintf4(vector float  in)
+static inline vector signed int
+_irintf4(vector float  in)
 {
  return spu_convts(in,0);
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/iroundf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/iroundf4.h
@@ -29,10 +29,14 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_IROUNDF4_H___
+#define ___SIMD_MATH_IROUNDF4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector signed int iroundf4(vector float in)
+static inline vector signed int
+_iroundf4(vector float in)
 {
  vec_int4 exp, out;
  vec_uint4  addend;
@@ -53,3 +57,5 @@ vector signed int iroundf4(vector float in)

  return (out);
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/is0denormd2.h
+++ b/Extras/simdmathlibrary/spu/simdmath/is0denormd2.h
@@ -27,20 +27,25 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_IS0DENORMD2_H___
+#define ___SIMD_MATH_IS0DENORMD2_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>


-vector unsigned long long
-is0denormd2 (vector double x)
+static inline vector unsigned long long
+_is0denormd2 (vector double x)
 {
-   vec_double2 xexp;
-   vec_ullong2 cmp;
-   vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
+  vec_double2 xexp;
+  vec_ullong2 cmp;
+  vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };

-   xexp = (vec_double2)spu_and( (vec_ullong2)x, spu_splats(0x7ff0000000000000ull) );
-   cmp = (vec_ullong2)spu_cmpeq( (vec_uint4)xexp, (vec_uint4)spu_splats(0) );
-   cmp = spu_shuffle( cmp, cmp, even );
+  xexp = (vec_double2)spu_and( (vec_ullong2)x, spu_splats(0x7ff0000000000000ull) );
+  cmp = (vec_ullong2)spu_cmpeq( (vec_uint4)xexp, (vec_uint4)spu_splats(0) );
+  cmp = spu_shuffle( cmp, cmp, even );

-   return cmp;
+  return cmp;
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/is0denormf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/is0denormf4.h
@@ -27,11 +27,16 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_IS0DENORMF4_H___
+#define ___SIMD_MATH_IS0DENORMF4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector unsigned int
-is0denormf4 (vector float x)
+static inline vector unsigned int
+_is0denormf4 (vector float x)
 {
-   return spu_cmpeq( (vec_uint4)spu_and( (vec_uint4)x, spu_splats((unsigned int)0x7f800000) ), (vec_uint4)spu_splats(0x00000000) );
+  return spu_cmpeq( (vec_uint4)spu_and( (vec_uint4)x, spu_splats((unsigned int)0x7f800000) ), (vec_uint4)spu_splats(0x00000000) );
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/isequald2.h
+++ b/Extras/simdmathlibrary/spu/simdmath/isequald2.h
@@ -27,28 +27,35 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_ISEQUALD2_H___
+#define ___SIMD_MATH_ISEQUALD2_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector unsigned long long
-isequald2 (vector double x, vector double y)
+#include <simdmath/isnand2.h>
+
+static inline vector unsigned long long
+_isequald2 (vector double x, vector double y)
 {
-   vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
-   vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
-   vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
-   vec_ullong2 sign = spu_splats(0x8000000000000000ull);
-   vec_uint4 cmpeq_i, cmpeq_i_even, cmpeq_i_odd;
-   vec_ullong2 bothzero;
+  vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
+  vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
+  vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
+  vec_ullong2 sign = spu_splats(0x8000000000000000ull);
+  vec_uint4 cmpeq_i, cmpeq_i_even, cmpeq_i_odd;
+  vec_ullong2 bothzero;
   
-   cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
+  cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );

-   cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
-   cmpeq_i_odd = spu_shuffle( cmpeq_i, cmpeq_i, odd );
+  cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
+  cmpeq_i_odd = spu_shuffle( cmpeq_i, cmpeq_i, odd );

-   bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
-   bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
-   bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
+  bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
+  bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
+  bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
   
-   return spu_andc( spu_or( (vec_ullong2)spu_and( cmpeq_i_even, cmpeq_i_odd), bothzero), 
-   					spu_or( isnand2( x ), isnand2( y ) ) );
+  return spu_andc( spu_or( (vec_ullong2)spu_and( cmpeq_i_even, cmpeq_i_odd), bothzero), 
+		   spu_or( _isnand2( x ), _isnand2( y ) ) );
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/isequalf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/isequalf4.h
@@ -27,11 +27,16 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_ISEQUALF4_H___
+#define ___SIMD_MATH_ISEQUALF4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector unsigned int
-isequalf4 (vector float x, vector float y)
+static inline vector unsigned int
+_isequalf4 (vector float x, vector float y)
 {   
-	return spu_cmpeq(x, y);
+  return spu_cmpeq(x, y);
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/isfinited2.h
+++ b/Extras/simdmathlibrary/spu/simdmath/isfinited2.h
@@ -27,21 +27,25 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_ISFINITED2_H___
+#define ___SIMD_MATH_ISFINITED2_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector unsigned long long
-isfinited2 (vector double x)
+static inline vector unsigned long long
+_isfinited2 (vector double x)
 {
-   vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
-   vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
-   vec_ullong2 cmpr;
+  vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
+  vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
+  vec_ullong2 cmpr;

-   //Finite unless NaN or Inf, check for 'not all-ones exponent'
+  //Finite unless NaN or Inf, check for 'not all-ones exponent'
   
-   cmpr = (vec_ullong2)spu_cmpgt( (vec_uint4)expn, (vec_uint4)spu_and( (vec_ullong2)x, expn ) );
-   cmpr = spu_shuffle( cmpr, cmpr, even);
+  cmpr = (vec_ullong2)spu_cmpgt( (vec_uint4)expn, (vec_uint4)spu_and( (vec_ullong2)x, expn ) );
+  cmpr = spu_shuffle( cmpr, cmpr, even);
   
-   return cmpr;
+  return cmpr;
 }

+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/isfinitef4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/isfinitef4.h
@@ -27,14 +27,19 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_ISFINITEF4_H___
+#define ___SIMD_MATH_ISFINITEF4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector unsigned int
-isfinitef4 (vector float x)
+static inline vector unsigned int
+_isfinitef4 (vector float x)
 {
-	(void)x;
+  (void)x;
 	
-	// NaN, INF not supported on SPU, result always a mask of ones
-	return spu_splats((unsigned int)0xffffffff);
+  // NaN, INF not supported on SPU, result always a mask of ones
+  return spu_splats((unsigned int)0xffffffff);
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/isgreaterd2.h
+++ b/Extras/simdmathlibrary/spu/simdmath/isgreaterd2.h
@@ -27,39 +27,45 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_ISGREATERD2_H___
+#define ___SIMD_MATH_ISGREATERD2_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector unsigned long long
-isgreaterd2 (vector double x, vector double y)
+#include <simdmath/isnand2.h>
+
+static inline vector unsigned long long
+_isgreaterd2 (vector double x, vector double y)
 {
-   vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
-   vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
-   vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
-   vec_ullong2 sign = spu_splats(0x8000000000000000ull);
-   vec_uint4   cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
-   vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
-   vec_ullong2 bothneg, bothzero;
+  vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
+  vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
+  vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
+  vec_ullong2 sign = spu_splats(0x8000000000000000ull);
+  vec_uint4   cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
+  vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
+  vec_ullong2 bothneg, bothzero;
   
-   cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y );
-   cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
-   cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y );
+  cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y );
+  cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
+  cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y );

-   cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
-   cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
-                                   spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
-   cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
-   cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );
+  cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
+  cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
+				  spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
+  cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
+  cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );

-   bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
-   bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
-   bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
+  bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
+  bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
+  bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );

-   bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
-   bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
-   bothneg = spu_shuffle( bothneg, bothneg, even );
+  bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
+  bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
+  bothneg = spu_shuffle( bothneg, bothneg, even );
   
-   return spu_andc( spu_sel( cmpgt_ll, cmplt_ll, bothneg ), 
-                    spu_or( bothzero, spu_or( isnand2 ( x ), isnand2 ( y ) ) ) );
+  return spu_andc( spu_sel( cmpgt_ll, cmplt_ll, bothneg ), 
+		   spu_or( bothzero, spu_or( _isnand2 ( x ), _isnand2 ( y ) ) ) );
 }

+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/isgreaterequald2.h
+++ b/Extras/simdmathlibrary/spu/simdmath/isgreaterequald2.h
@@ -27,41 +27,47 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_ISGREATEREQUALD2_H___
+#define ___SIMD_MATH_ISGREATEREQUALD2_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector unsigned long long
-isgreaterequald2 (vector double x, vector double y)
+#include <simdmath/isnand2.h>
+
+static inline vector unsigned long long
+_isgreaterequald2 (vector double x, vector double y)
 {
-   vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
-   vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
-   vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
-   vec_ullong2 sign = spu_splats(0x8000000000000000ull);
-   vec_uint4   cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
-   vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
-   vec_ullong2 bothneg, bothzero;
+  vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
+  vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
+  vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
+  vec_ullong2 sign = spu_splats(0x8000000000000000ull);
+  vec_uint4   cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
+  vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
+  vec_ullong2 bothneg, bothzero;
   
-   cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y );
-   cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
-   cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y );
+  cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y );
+  cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
+  cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y );

-   cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
-   cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
-                                   spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
-   cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
-   cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );
+  cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
+  cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
+				  spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
+  cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
+  cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );

-   bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
-   bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
-   bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
+  bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
+  bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
+  bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );

-   cmpeq_ll = spu_or( cmpeq_ll, bothzero);
+  cmpeq_ll = spu_or( cmpeq_ll, bothzero);

-   bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
-   bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
-   bothneg = spu_shuffle( bothneg, bothneg, even );
+  bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
+  bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
+  bothneg = spu_shuffle( bothneg, bothneg, even );
   
-   return spu_andc( spu_or( spu_sel ( cmpgt_ll, cmplt_ll, bothneg ), cmpeq_ll ),
-                    spu_or( isnand2 ( x ), isnand2 ( y ) ) );
+  return spu_andc( spu_or( spu_sel ( cmpgt_ll, cmplt_ll, bothneg ), cmpeq_ll ),
+		   spu_or( _isnand2 ( x ), _isnand2 ( y ) ) );
 }

+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/isgreaterequalf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/isgreaterequalf4.h
@@ -27,15 +27,20 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_ISGREATEREQUALF4_H___
+#define ___SIMD_MATH_ISGREATEREQUALF4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector unsigned int
-isgreaterequalf4 (vector float x, vector float y)
+static inline vector unsigned int
+_isgreaterequalf4 (vector float x, vector float y)
 {   
-	vec_uint4 var;
+  vec_uint4 var;
 	
-	var = spu_cmpgt(y, x);
+  var = spu_cmpgt(y, x);
 	
-	return spu_nor(var, var);
+  return spu_nor(var, var);
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/isgreaterf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/isgreaterf4.h
@@ -27,11 +27,16 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_ISGREATERF4_H___
+#define ___SIMD_MATH_ISGREATERF4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector unsigned int
-isgreaterf4 (vector float x, vector float y)
+static inline vector unsigned int
+_isgreaterf4 (vector float x, vector float y)
 {   
-	return spu_cmpgt(x, y);
+  return spu_cmpgt(x, y);
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/isinfd2.h
+++ b/Extras/simdmathlibrary/spu/simdmath/isinfd2.h
@@ -27,21 +27,25 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_ISINFD2_H___
+#define ___SIMD_MATH_ISINFD2_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>


-vector unsigned long long
-isinfd2 (vector double x)
+static inline vector unsigned long long
+_isinfd2 (vector double x)
 {
-   vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
-   vec_double2 xabs;
-   vec_ullong2 cmp;
+  vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
+  vec_double2 xabs;
+  vec_ullong2 cmp;

-   xabs = (vec_double2)spu_andc( (vec_ullong2)x, spu_splats(0x8000000000000000ull) );
-   cmp = (vec_ullong2)spu_cmpeq( (vec_uint4)xabs, (vec_uint4)spu_splats(0x7ff0000000000000ull) );
-   cmp = spu_and( cmp, spu_shuffle( cmp, cmp, swapEvenOdd ) );
+  xabs = (vec_double2)spu_andc( (vec_ullong2)x, spu_splats(0x8000000000000000ull) );
+  cmp = (vec_ullong2)spu_cmpeq( (vec_uint4)xabs, (vec_uint4)spu_splats(0x7ff0000000000000ull) );
+  cmp = spu_and( cmp, spu_shuffle( cmp, cmp, swapEvenOdd ) );

-   return cmp;
+  return cmp;
 }

+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/isinff4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/isinff4.h
@@ -27,14 +27,19 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_ISINFF4_H___
+#define ___SIMD_MATH_ISINFF4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector unsigned int
-isinff4 (vector float x)
+static inline vector unsigned int
+_isinff4 (vector float x)
 {   
-	(void)x;
+  (void)x;
 	
-	// INF not supported on SPU, result always zero
-	return spu_splats((unsigned int)0x00000000);
+  // INF not supported on SPU, result always zero
+  return spu_splats((unsigned int)0x00000000);
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/islessd2.h
+++ b/Extras/simdmathlibrary/spu/simdmath/islessd2.h
@@ -27,38 +27,45 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_ISLESSD2_H___
+#define ___SIMD_MATH_ISLESSD2_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector unsigned long long
-islessd2 (vector double x, vector double y)
+#include <simdmath/isnand2.h>
+
+static inline vector unsigned long long
+_islessd2 (vector double x, vector double y)
 {
-   vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
-   vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
-   vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
-   vec_ullong2 sign = spu_splats(0x8000000000000000ull);
-   vec_uint4   cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
-   vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
-   vec_ullong2 bothneg, bothzero;
+  vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
+  vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
+  vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
+  vec_ullong2 sign = spu_splats(0x8000000000000000ull);
+  vec_uint4   cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
+  vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
+  vec_ullong2 bothneg, bothzero;
   
-   cmpgt_i = spu_cmpgt( (vec_int4)y, (vec_int4)x );
-   cmpeq_i = spu_cmpeq( (vec_int4)y, (vec_int4)x );
-   cmpgt_ui = spu_cmpgt( (vec_uint4)y, (vec_uint4)x );
+  cmpgt_i = spu_cmpgt( (vec_int4)y, (vec_int4)x );
+  cmpeq_i = spu_cmpeq( (vec_int4)y, (vec_int4)x );
+  cmpgt_ui = spu_cmpgt( (vec_uint4)y, (vec_uint4)x );

-   cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
-   cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
-                                   spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
-   cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
-   cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );
+  cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
+  cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
+				  spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
+  cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
+  cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );

-   bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
-   bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
-   bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
+  bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
+  bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
+  bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );

-   bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
-   bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
-   bothneg = spu_shuffle( bothneg, bothneg, even );
+  bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
+  bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
+  bothneg = spu_shuffle( bothneg, bothneg, even );
   
-   return spu_andc( spu_sel( cmpgt_ll, cmplt_ll, bothneg ), 
-                    spu_or( bothzero, spu_or( isnand2 ( x ), isnand2 ( y ) ) ) );
+  return spu_andc( spu_sel( cmpgt_ll, cmplt_ll, bothneg ), 
+		   spu_or( bothzero, spu_or( _isnand2 ( x ), _isnand2 ( y ) ) ) );
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/islessequald2.h
+++ b/Extras/simdmathlibrary/spu/simdmath/islessequald2.h
@@ -27,40 +27,47 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_ISLESSEQUALD2_H___
+#define ___SIMD_MATH_ISLESSEQUALD2_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector unsigned long long
-islessequald2 (vector double x, vector double y)
+#include <simdmath/isnand2.h>
+
+static inline vector unsigned long long
+_islessequald2 (vector double x, vector double y)
 {
-   vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
-   vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
-   vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
-   vec_ullong2 sign = spu_splats(0x8000000000000000ull);
-   vec_uint4   cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
-   vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
-   vec_ullong2 bothneg, bothzero;
+  vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
+  vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
+  vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
+  vec_ullong2 sign = spu_splats(0x8000000000000000ull);
+  vec_uint4   cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
+  vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
+  vec_ullong2 bothneg, bothzero;
   
-   cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y );
-   cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
-   cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y );
+  cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y );
+  cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
+  cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y );

-   cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
-   cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
-                                   spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
-   cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
-   cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );
+  cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
+  cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
+				  spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
+  cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
+  cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );

-   bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
-   bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
-   bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
+  bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
+  bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
+  bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );

-   cmpeq_ll = spu_or( cmpeq_ll, bothzero);
+  cmpeq_ll = spu_or( cmpeq_ll, bothzero);

-   bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
-   bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
-   bothneg = spu_shuffle( bothneg, bothneg, even );
+  bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
+  bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
+  bothneg = spu_shuffle( bothneg, bothneg, even );

-   return spu_andc( spu_or( spu_sel( cmplt_ll, cmpgt_ll, bothneg ), cmpeq_ll), 
-   					spu_or( isnand2 ( x ), isnand2 ( y ) ) );
+  return spu_andc( spu_or( spu_sel( cmplt_ll, cmpgt_ll, bothneg ), cmpeq_ll), 
+		   spu_or( _isnand2 ( x ), _isnand2 ( y ) ) );
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/islessequalf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/islessequalf4.h
@@ -27,15 +27,20 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_ISLESSEQUALF4_H___
+#define ___SIMD_MATH_ISLESSEQUALF4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector unsigned int
-islessequalf4 (vector float x, vector float y)
+static inline vector unsigned int
+_islessequalf4 (vector float x, vector float y)
 {   
-	vec_uint4 var;
+  vec_uint4 var;

-	var = spu_cmpgt(x, y);
+  var = spu_cmpgt(x, y);
 	
-	return spu_nor(var, var);
+  return spu_nor(var, var);
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/islessf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/islessf4.h
@@ -27,11 +27,16 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_ISLESSF4_H___
+#define ___SIMD_MATH_ISLESSF4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector unsigned int
-islessf4 (vector float x, vector float y)
+static inline vector unsigned int
+_islessf4 (vector float x, vector float y)
 {   
-	return spu_cmpgt(y, x);
+  return spu_cmpgt(y, x);
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/islessgreaterd2.h
+++ b/Extras/simdmathlibrary/spu/simdmath/islessgreaterd2.h
@@ -27,29 +27,35 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_ISLESSGREATERD2_H___
+#define ___SIMD_MATH_ISLESSGREATERD2_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector unsigned long long
-islessgreaterd2 (vector double x, vector double y)
-{
-   vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
-   vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
-   vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
-   vec_ullong2 sign = spu_splats(0x8000000000000000ull);
-   vec_uint4 cmpeq_i, cmpeq_i_even, cmpeq_i_odd;
-   vec_ullong2 bothzero;
-   
-   cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
+#include <simdmath/isnand2.h>

-   cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
-   cmpeq_i_odd = spu_shuffle( cmpeq_i, cmpeq_i, odd );
+static inline vector unsigned long long
+_islessgreaterd2 (vector double x, vector double y)
+{
+  vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
+  vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
+  vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
+  vec_ullong2 sign = spu_splats(0x8000000000000000ull);
+  vec_uint4 cmpeq_i, cmpeq_i_even, cmpeq_i_odd;
+  vec_ullong2 bothzero;
   
-   bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
-   bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
-   bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
+  cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
+
+  cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
+  cmpeq_i_odd = spu_shuffle( cmpeq_i, cmpeq_i, odd );
   
-   return spu_andc( (vec_ullong2)spu_nand( cmpeq_i_even, cmpeq_i_odd),
-   					spu_or( bothzero, spu_or( isnand2 ( x ), isnand2 ( y ) ) ) );
+  bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
+  bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
+  bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
+   
+  return spu_andc( (vec_ullong2)spu_nand( cmpeq_i_even, cmpeq_i_odd),
+		   spu_or( bothzero, spu_or( _isnand2 ( x ), _isnand2 ( y ) ) ) );
 }

+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/islessgreaterf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/islessgreaterf4.h
@@ -27,15 +27,20 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_ISLESSGREATERF4_H___
+#define ___SIMD_MATH_ISLESSGREATERF4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector unsigned int
-islessgreaterf4 (vector float x, vector float y)
+static inline vector unsigned int
+_islessgreaterf4 (vector float x, vector float y)
 {   
-	vec_uint4 var;
+  vec_uint4 var;

-	var = spu_cmpeq(x, y);
+  var = spu_cmpeq(x, y);
 	
-	return spu_nor(var, var);
+  return spu_nor(var, var);
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/isnand2.h
+++ b/Extras/simdmathlibrary/spu/simdmath/isnand2.h
@@ -27,26 +27,30 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_ISNAND2_H___
+#define ___SIMD_MATH_ISNAND2_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector unsigned long long
-isnand2 (vector double x)
+static inline vector unsigned long long
+_isnand2 (vector double x)
 {
-   vec_double2 xneg;
-   vec_ullong2 cmpgt, cmpeq, cmpnan;
-   vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
-   vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
-   vec_uint4   expmask = (vec_uint4)spu_splats(0xfff0000000000000ull);
+  vec_double2 xneg;
+  vec_ullong2 cmpgt, cmpeq, cmpnan;
+  vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
+  vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
+  vec_uint4   expmask = (vec_uint4)spu_splats(0xfff0000000000000ull);

-   xneg = (vec_double2)spu_or( (vec_ullong2)x, spu_splats(0x8000000000000000ull) );
-   cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)xneg, expmask );
-   cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)xneg, expmask );
+  xneg = (vec_double2)spu_or( (vec_ullong2)x, spu_splats(0x8000000000000000ull) );
+  cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)xneg, expmask );
+  cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)xneg, expmask );

-   cmpnan = spu_or( spu_shuffle( cmpgt, cmpgt, even ),
-                    spu_and( spu_shuffle( cmpeq, cmpeq, even ), 
-                             spu_shuffle( cmpgt, cmpgt, odd ) ) );
+  cmpnan = spu_or( spu_shuffle( cmpgt, cmpgt, even ),
+		   spu_and( spu_shuffle( cmpeq, cmpeq, even ), 
+			    spu_shuffle( cmpgt, cmpgt, odd ) ) );

-   return cmpnan;
+  return cmpnan;
 }

+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/isnanf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/isnanf4.h
@@ -27,14 +27,19 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_ISNANF4_H___
+#define ___SIMD_MATH_ISNANF4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector unsigned int
-isnanf4 (vector float x)
+static inline vector unsigned int
+_isnanf4 (vector float x)
 {   
-	(void)x;
+  (void)x;
 	
-	// NaN not supported on SPU, result always zero
-	return spu_splats((unsigned int)0x00000000);
+  // NaN not supported on SPU, result always zero
+  return spu_splats((unsigned int)0x00000000);
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/isnormald2.h
+++ b/Extras/simdmathlibrary/spu/simdmath/isnormald2.h
@@ -27,23 +27,27 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_ISNORMALD2_H___
+#define ___SIMD_MATH_ISNORMALD2_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector unsigned long long
-isnormald2 (vector double x)
+static inline vector unsigned long long
+_isnormald2 (vector double x)
 {
-   vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
-   vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
-   vec_ullong2 cmpr;
+  vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
+  vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
+  vec_ullong2 cmpr;

-   //Normal unless nan, infinite, denorm, or zero
+  //Normal unless nan, infinite, denorm, or zero

-   //Check for 'not zero or all-ones exponent'
-   cmpr = (vec_ullong2)spu_and( spu_cmpgt( (vec_uint4)spu_and( (vec_ullong2)x, expn ), (vec_uint4)spu_splats(0x0000000000000000ull) ),
-				   spu_cmpgt( (vec_uint4)expn, (vec_uint4)spu_and( (vec_ullong2)x, expn ) ) );
-   cmpr = spu_shuffle( cmpr, cmpr, even);
+  //Check for 'not zero or all-ones exponent'
+  cmpr = (vec_ullong2)spu_and( spu_cmpgt( (vec_uint4)spu_and( (vec_ullong2)x, expn ), (vec_uint4)spu_splats(0x0000000000000000ull) ),
+			       spu_cmpgt( (vec_uint4)expn, (vec_uint4)spu_and( (vec_ullong2)x, expn ) ) );
+  cmpr = spu_shuffle( cmpr, cmpr, even);
   
-   return cmpr;
+  return cmpr;
 }

+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/isnormalf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/isnormalf4.h
@@ -27,12 +27,17 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_ISNORMALF4_H___
+#define ___SIMD_MATH_ISNORMALF4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector unsigned int
-isnormalf4 (vector float x)
+static inline vector unsigned int
+_isnormalf4 (vector float x)
 {
-	// NaN, INF not supported on SPU; normal unless zero
-	return spu_cmpabsgt(x, (vector float)spu_splats(0x00000000));
+  // NaN, INF not supported on SPU; normal unless zero
+  return spu_cmpabsgt(x, (vector float)spu_splats(0x00000000));
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/isunorderedd2.h
+++ b/Extras/simdmathlibrary/spu/simdmath/isunorderedd2.h
@@ -27,37 +27,41 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_ISUNORDEREDD2_H___
+#define ___SIMD_MATH_ISUNORDEREDD2_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector unsigned long long
-isunorderedd2 (vector double x, vector double y)
+static inline vector unsigned long long
+_isunorderedd2 (vector double x, vector double y)
 {
-   vec_double2 neg;
-   vec_ullong2 cmpgt, cmpeq, cmpnanx, cmpnany;
-   vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
-   vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
-   vec_ullong2 expn = (vec_ullong2)spu_splats(0xfff0000000000000ull);
-   vec_ullong2 sign = (vec_ullong2)spu_splats(0x8000000000000000ull);
+  vec_double2 neg;
+  vec_ullong2 cmpgt, cmpeq, cmpnanx, cmpnany;
+  vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
+  vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
+  vec_ullong2 expn = (vec_ullong2)spu_splats(0xfff0000000000000ull);
+  vec_ullong2 sign = (vec_ullong2)spu_splats(0x8000000000000000ull);

-   //Check if x is nan
-   neg = (vec_double2)spu_or( (vec_ullong2)x, sign );
-   cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)neg, (vec_uint4)expn );
-   cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)neg, (vec_uint4)expn );
+  //Check if x is nan
+  neg = (vec_double2)spu_or( (vec_ullong2)x, sign );
+  cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)neg, (vec_uint4)expn );
+  cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)neg, (vec_uint4)expn );

-   cmpnanx = spu_or( spu_shuffle( cmpgt, cmpgt, even ),
+  cmpnanx = spu_or( spu_shuffle( cmpgt, cmpgt, even ),
                    spu_and( spu_shuffle( cmpeq, cmpeq, even ), 
                             spu_shuffle( cmpgt, cmpgt, odd ) ) );

-   //Check if y is nan
-   neg = (vec_double2)spu_or( (vec_ullong2)y, sign );
-   cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)neg, (vec_uint4)expn );
-   cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)neg, (vec_uint4)expn );
+  //Check if y is nan
+  neg = (vec_double2)spu_or( (vec_ullong2)y, sign );
+  cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)neg, (vec_uint4)expn );
+  cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)neg, (vec_uint4)expn );

-   cmpnany = spu_or( spu_shuffle( cmpgt, cmpgt, even ),
+  cmpnany = spu_or( spu_shuffle( cmpgt, cmpgt, even ),
                    spu_and( spu_shuffle( cmpeq, cmpeq, even ), 
                             spu_shuffle( cmpgt, cmpgt, odd ) ) );

-   return spu_or( cmpnanx, cmpnany );
+  return spu_or( cmpnanx, cmpnany );
 }

+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/isunorderedf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/isunorderedf4.h
@@ -27,15 +27,20 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_ISUNORDEREDF4_H___
+#define ___SIMD_MATH_ISUNORDEREDF4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector unsigned int
-isunorderedf4 (vector float x, vector float y)
+static inline vector unsigned int
+_isunorderedf4 (vector float x, vector float y)
 {
-	(void)x;
-	(void)y;
+  (void)x;
+  (void)y;
 	
-	// NaN not supported on SPU, result always zero   
-	return spu_splats((unsigned int)0x00000000);
+  // NaN not supported on SPU, result always zero   
+  return spu_splats((unsigned int)0x00000000);
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/ldexpd2.h
+++ b/Extras/simdmathlibrary/spu/simdmath/ldexpd2.h
@@ -29,17 +29,20 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_LDEXPD2_H___
+#define ___SIMD_MATH_LDEXPD2_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector double 
-ldexpd2(vector double x, vector signed long long ex)
+static inline vector double 
+_ldexpd2(vector double x, vector signed long long ex)
 {
  vec_int4 e1, e2;
  vec_int4 min = spu_splats(-2099);
-//  vec_int4 min = spu_splats(-2044);
+  //  vec_int4 min = spu_splats(-2044);
  vec_int4 max = spu_splats( 2098);
-//  vec_int4 max = spu_splats( 2046);
+  //  vec_int4 max = spu_splats( 2046);
  vec_uint4 cmp_min, cmp_max;
  vec_uint4 shift = ((vec_uint4){20, 32, 20, 32});
  vec_double2 f1, f2;
@@ -83,7 +86,7 @@ ldexpd2(vector double x, vector signed long long ex)

  /* Compute the product x * 2^e1 * 2^e2
   */
-//  out = spu_mul(spu_mul(x, f1), f2);
+  //  out = spu_mul(spu_mul(x, f1), f2);

  // check floating point register DENORM bit
  vec_uint4 fpscr0, fpscr;
@@ -159,7 +162,7 @@ ldexpd2(vector double x, vector signed long long ex)

  maxmask = spu_or (maxmask, (vec_uchar16)spu_cmpgt(esum, 2046));
  maxmask = spu_shuffle(maxmask, maxmask, splat_msb);
-//  maxmask = spu_and(maxmask, ((vec_uchar16)spu_splats((long long)0x7FFFFFFFFFFFFFFFLL)));
+  //  maxmask = spu_and(maxmask, ((vec_uchar16)spu_splats((long long)0x7FFFFFFFFFFFFFFFLL)));
  minmask = spu_or (minmask, (vec_uchar16)spu_cmpgt(zeros, esum));
  minmask = spu_shuffle(minmask, minmask, splat_msb);
  
@@ -245,9 +248,9 @@ ldexpd2(vector double x, vector signed long long ex)
  vec_uint4 mantr = spu_shuffle( mant0r, mant1r, ((vec_uchar16){0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23}));

  // select right answer
-  x = spu_sel(x, (vec_double2)spu_sl(esum,20), (vec_uchar16)expmask);
-  x = spu_sel(x, (vec_double2)zeros, minmask);
-  x = spu_sel(x, (vec_double2)spu_splats((long long)0x7FEFFFFFFFFFFFFFLL), maxmask);
+  x = spu_sel(x, (vec_double2)spu_sl(esum,20), (vec_ullong2)expmask);
+  x = spu_sel(x, (vec_double2)zeros, (vec_ullong2)minmask);
+  x = spu_sel(x, (vec_double2)spu_splats((long long)0x7FEFFFFFFFFFFFFFLL), (vec_ullong2)maxmask);

  out = (vec_double2)spu_sel((vec_uint4)x , mantr, mrange);

@@ -260,4 +263,4 @@ ldexpd2(vector double x, vector signed long long ex)
  return out;
 }

-
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/ldexpf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/ldexpf4.h
@@ -27,26 +27,30 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_LDEXPF4_H___
+#define ___SIMD_MATH_LDEXPF4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>
-vector float
-ldexpf4 (vector float x, vector signed int exp)
+
+static inline vector float
+_ldexpf4 (vector float x, vector signed int exp)
 {
  vec_int4 zeros = spu_splats(0);

-  vec_uchar16 expmask = (vec_uchar16)spu_splats((int)0x7F800000);
+  vec_uint4 expmask = spu_splats(0x7F800000U);
  vec_int4 e1 = spu_and((vec_int4)x, (vec_int4)expmask);
  vec_int4 e2 = spu_rlmask(e1,-23);

-  vec_uchar16 maxmask = (vec_uchar16)spu_cmpgt(exp, 255);
-  vec_uchar16 minmask = (vec_uchar16)spu_cmpgt(spu_splats(-255), exp);
-  minmask = spu_or (minmask, (vec_uchar16)spu_cmpeq(x, (vec_float4)zeros));
+  vec_uint4 maxmask = spu_cmpgt(exp, 255);
+  vec_uint4 minmask = spu_cmpgt(spu_splats(-255), exp);
+  minmask = spu_or (minmask, spu_cmpeq(x, (vec_float4)zeros));

  vec_int4 esum = spu_add(e2, exp);

-  maxmask = spu_or (maxmask, (vec_uchar16)spu_cmpgt(esum, 255));
-  maxmask = spu_and(maxmask, (vec_uchar16)spu_splats((int)0x7FFFFFFF));
-  minmask = spu_or (minmask, (vec_uchar16)spu_cmpgt(zeros, esum));
+  maxmask = spu_or (maxmask, spu_cmpgt(esum, 255));
+  maxmask = spu_and(maxmask, spu_splats(0x7FFFFFFFU));
+  minmask = spu_or (minmask, spu_cmpgt(zeros, esum));

  x = spu_sel(x, (vec_float4)spu_sl(esum,23), expmask);
  x = spu_sel(x, (vec_float4)zeros, minmask);
@@ -54,3 +58,5 @@ ldexpf4 (vector float x, vector signed int exp)
  x = spu_sel(x, (vec_float4)maxmask, maxmask);
  return x;
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/llabsi2.h
+++ b/Extras/simdmathlibrary/spu/simdmath/llabsi2.h
@@ -27,11 +27,14 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_LLABSI2_H___
+#define ___SIMD_MATH_LLABSI2_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-vector signed long long
-llabsi2 (vector signed long long in)
+static inline vector signed long long
+_llabsi2 (vector signed long long in)
 {
  vec_uint4 sign = (vec_uint4)spu_rlmaska((vec_int4)in, -31);
  sign = spu_shuffle(sign, sign, ((vec_uchar16){ 0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}));
@@ -43,3 +46,5 @@ llabsi2 (vector signed long long in)

  return ((vec_llong2)(res));
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/lldivi2.h
+++ b/Extras/simdmathlibrary/spu/simdmath/lldivi2.h
@@ -0,0 +1,85 @@
+/* lldivi2 - 
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ___SIMD_MATH_LLDIVI2_H___
+#define ___SIMD_MATH_LLDIVI2_H___
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+#include <simdmath/_lldiv.h>
+#include <simdmath/lldivu2.h>
+
+static inline vector signed long long
+__lldivi2_negatell2 (vector signed long long x)
+{
+  vector signed int zero = (vector signed int){0,0,0,0};
+  vector signed int borrow;
+
+  borrow = spu_genb(zero, (vec_int4)x);
+  borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){4,5,6,7, 0xc0,0xc0,0xc0,0xc0, 12,13,14,15, 0xc0,0xc0,0xc0,0xc0}));
+  return (vec_llong2)spu_subx(zero, (vec_int4)x, borrow);
+}
+
+// lldivi2 - for each of two signed long long interger slots, compute quotient and remainder of 
+// numer/denom and store in lldivi2_t struct.  Divide by zero produces quotient = 0, remainder = numerator.
+
+static inline lldivi2_t
+_lldivi2 (vector signed long long numer, vector signed long long denom)
+{
+  lldivi2_t res;
+  lldivu2_t resAbs;
+  vec_ullong2 numerAbs, denomAbs;
+  vec_uint4 numerPos, denomPos, quotNeg;
+
+  // Determine whether result needs sign change
+
+  numerPos = spu_cmpgt((vec_int4)numer, -1);
+  numerPos = spu_shuffle(numerPos, numerPos, ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}));
+  denomPos = spu_cmpgt((vec_int4)denom, -1);
+  denomPos = spu_shuffle(denomPos, denomPos, ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}));
+  quotNeg = spu_xor( numerPos, denomPos );
+    
+  // Use absolute values of numerator, denominator
+
+  numerAbs = (vec_ullong2)spu_sel(__lldivi2_negatell2(numer), numer, (vec_ullong2)numerPos);
+  denomAbs = (vec_ullong2)spu_sel(__lldivi2_negatell2(denom), denom, (vec_ullong2)denomPos);
+
+  // Get difference of leading zeros.
+
+  resAbs = _lldivu2(numerAbs, denomAbs);
+  res.quot = spu_sel((vec_llong2)resAbs.quot, __lldivi2_negatell2((vec_llong2)resAbs.quot),
+		     (vec_ullong2)quotNeg);
+  res.rem = spu_sel(__lldivi2_negatell2((vec_llong2)resAbs.rem), (vec_llong2)resAbs.rem,
+		    (vec_ullong2)numerPos);
+
+  return res;
+}
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/lldivu2.h
+++ b/Extras/simdmathlibrary/spu/simdmath/lldivu2.h
@@ -27,46 +27,51 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_LLDIVU2_H___
+#define ___SIMD_MATH_LLDIVU2_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>
-#include "lldiv.h"
+
+#include <simdmath/_lldiv.h>

 // lldivu2 - for each of two unsigned long long interger slots, compute quotient and remainder of 
 // numer/denom and store in lldivu2_t struct.  Divide by zero produces quotient = 0, remainder = numerator.

-lldivu2_t lldivu2 (vector unsigned long long numer, vector unsigned long long denom)
+static inline lldivu2_t
+_lldivu2 (vector unsigned long long numer, vector unsigned long long denom)
 {
-   lldivu2_t res;
-   vec_uint4 denomZeros, numerZeros;
-   vec_int4 shift;
-   vec_ullong2 denomShifted, oneShifted, denomLeft, oneLeft;
-   vec_ullong2 quot, newQuot;
-   vec_ullong2 newNum, skip, cont;
-   int       anyCont;
+  lldivu2_t res;
+  vec_uint4 denomZeros, numerZeros;
+  vec_int4 shift;
+  vec_ullong2 denomShifted, oneShifted, denomLeft, oneLeft;
+  vec_ullong2 quot, newQuot;
+  vec_ullong2 newNum, skip, cont;
+  int       anyCont;

-   // Get difference of leading zeros.
+  // Get difference of leading zeros.

-   denomZeros = (vec_uint4)ll_spu_cntlz( denom );
-   numerZeros = (vec_uint4)ll_spu_cntlz( numer );
+  denomZeros = (vec_uint4)__ll_spu_cntlz( denom );
+  numerZeros = (vec_uint4)__ll_spu_cntlz( numer );

-   shift = (vec_int4)spu_sub( denomZeros, numerZeros );
+  shift = (vec_int4)spu_sub( denomZeros, numerZeros );

-   // Shift denom to align leading one with numerator's
+  // Shift denom to align leading one with numerator's

-   denomShifted = ll_spu_sl( denom, (vec_ullong2)shift );
-   oneShifted = ll_spu_sl( spu_splats(1ull), (vec_ullong2)shift );
-   oneShifted = spu_sel( oneShifted, spu_splats(0ull), ll_spu_cmpeq_zero( denom ) );
+  denomShifted = __ll_spu_sl( denom, (vec_ullong2)shift );
+  oneShifted = __ll_spu_sl( spu_splats(1ull), (vec_ullong2)shift );
+  oneShifted = spu_sel( oneShifted, spu_splats(0ull), __ll_spu_cmpeq_zero( denom ) );

-   // Shift left all leading zeros.
+  // Shift left all leading zeros.

-   denomLeft = ll_spu_sl( denom, (vec_ullong2)denomZeros );
-   oneLeft = ll_spu_sl( spu_splats(1ull), (vec_ullong2)denomZeros );
+  denomLeft = __ll_spu_sl( denom, (vec_ullong2)denomZeros );
+  oneLeft = __ll_spu_sl( spu_splats(1ull), (vec_ullong2)denomZeros );

-   quot = spu_splats(0ull);
+  quot = spu_splats(0ull);

-   do
-   {
-      cont = ll_spu_cmpgt( oneShifted, spu_splats(0ull) );
+  do
+    {
+      cont = __ll_spu_cmpgt( oneShifted, spu_splats(0ull) );
      anyCont = spu_extract( spu_gather((vec_uint4)cont ), 0 );

      newQuot = spu_or( quot, oneShifted );
@@ -74,25 +79,26 @@ lldivu2_t lldivu2 (vector unsigned long long numer, vector unsigned long long de
      // Subtract shifted denominator from remaining numerator 
      // when denominator is not greater.

-      skip = ll_spu_cmpgt( denomShifted, numer );
-      newNum = ll_spu_sub( numer, denomShifted );
+      skip = __ll_spu_cmpgt( denomShifted, numer );
+      newNum = __ll_spu_sub( numer, denomShifted );

      // If denominator is greater, next shift is one more, otherwise
      // next shift is number of leading zeros of remaining numerator.

-      numerZeros = (vec_uint4)spu_sel( ll_spu_cntlz( newNum ), (vec_ullong2)numerZeros, skip );
+      numerZeros = (vec_uint4)spu_sel( __ll_spu_cntlz( newNum ), (vec_ullong2)numerZeros, skip );
      shift = (vec_int4)spu_sub( (vec_uint4)skip, numerZeros );

-      oneShifted = ll_spu_rlmask( oneLeft, (vec_ullong2)shift );
-      denomShifted = ll_spu_rlmask( denomLeft, (vec_ullong2)shift );
+      oneShifted = __ll_spu_rlmask( oneLeft, (vec_ullong2)shift );
+      denomShifted = __ll_spu_rlmask( denomLeft, (vec_ullong2)shift );

      quot = spu_sel( newQuot, quot, skip );
      numer = spu_sel( newNum, numer, spu_orc(skip,cont) );
-   } 
-   while ( anyCont );
+    } 
+  while ( anyCont );

-   res.quot = quot;
-   res.rem = numer;
-   return res;
+  res.quot = quot;
+  res.rem = numer;
+  return res;
 }

+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/llrintd2.h
+++ b/Extras/simdmathlibrary/spu/simdmath/llrintd2.h
@@ -28,6 +28,9 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_LLRINTD2_H___
+#define ___SIMD_MATH_LLRINTD2_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

@@ -35,8 +38,8 @@
 // Handles no exception
 // over flow will return unspecified data

-vector signed long long
-llrintd2 (vector double in)
+static inline vector signed long long
+_llrintd2 (vector double in)
 {
  int shift0, shift1;
  vec_uchar16 splat_msb = ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8});
@@ -67,8 +70,8 @@ llrintd2 (vector double in)
  bias = spu_sel((vec_double2)((vec_ullong2){0x4330000000000000ULL,0x4330000000000000ULL}), ((vec_double2){0.0,0.0}), (vec_ullong2)is_large);
  bias = spu_sel(bias, xx, (vec_ullong2)spu_splats(0x8000000000000000ULL));

-//  bias = spu_sel((vec_double2)((vec_ullong2)spu_splats(0x4330000000000000ULL)), xx, 
-//		 (vec_ullong2)spu_splats(0x8000000000000000ULL));
+  //  bias = spu_sel((vec_double2)((vec_ullong2)spu_splats(0x4330000000000000ULL)), xx, 
+  //		 (vec_ullong2)spu_splats(0x8000000000000000ULL));
  mant = (vec_uint4)(spu_sub(spu_add(xx, bias), bias));

  /* Determine how many bits to shift the mantissa to correctly
@@ -102,9 +105,11 @@ llrintd2 (vector double in)
  mant = spu_xor(mant, sign);
  borrow = spu_genb(mant, sign);
  borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){ 
-						   4,5,6,7, 192,192,192,192,
-						   12,13,14,15, 192,192,192,192}));
+	4,5,6,7, 192,192,192,192,
+	  12,13,14,15, 192,192,192,192}));
  mant = spu_subx(mant, sign, borrow);

  return ((vec_llong2)(mant));
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/llrintf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/llrintf4.h
@@ -28,6 +28,9 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_LLRINTF4_H___
+#define ___SIMD_MATH_LLRINTF4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

@@ -35,8 +38,8 @@
 // Handles no exception
 // over flow will return unspecified data

-llroundf4_t 
-llrintf4 (vector float in)
+static inline llroundf4_t 
+_llrintf4 (vector float in)
 {
  llroundf4_t res;
  vec_int4 exp;
@@ -100,3 +103,5 @@ llrintf4 (vector float in)

  return res;
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/llroundd2.h
+++ b/Extras/simdmathlibrary/spu/simdmath/llroundd2.h
@@ -28,6 +28,9 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_LLROUNDD2_H___
+#define ___SIMD_MATH_LLROUNDD2_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

@@ -35,8 +38,8 @@
 // Handles no exception
 // over flow will return unspecified data

-vector signed long long
-llroundd2 (vector double in)
+static inline vector signed long long
+_llroundd2 (vector double in)
 {
  int shift0, shift1;
  vec_uchar16 splat_msb = { 0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8};
@@ -72,7 +75,7 @@ llroundd2 (vector double in)
   */
  addend = spu_shuffle(mant0, mant1, ((vec_uchar16){0x80,0x80,0x80,0x80,0x80,0x80,0x80,8, 0x80,0x80,0x80,0x80,0x80,0x80,0x80,24}));
  addend = spu_rlmask(addend, -7);
-//  addend = spu_and(spu_rlqw(mant, 1), ((vec_uint4){ 0,1,0,1}));
+  //  addend = spu_and(spu_rlqw(mant, 1), ((vec_uint4){ 0,1,0,1}));
  mant = spu_addx(mant, addend, spu_rlqwbyte(spu_genc(mant, addend), 4));

  /* Compute the two's complement of the mantissa if the 
@@ -84,9 +87,11 @@ llroundd2 (vector double in)
  mant = spu_xor(mant, sign);
  borrow = spu_genb(mant, sign);
  borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){
-						   4,5,6,7, 192,192,192,192,
-						   12,13,14,15, 192,192,192,192}));
+	4,5,6,7, 192,192,192,192,
+	  12,13,14,15, 192,192,192,192}));
  mant = spu_subx(mant, sign, borrow);

  return ((vec_llong2)(mant));
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/llroundf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/llroundf4.h
@@ -28,6 +28,9 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_LLROUNDF4_H___
+#define ___SIMD_MATH_LLROUNDF4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

@@ -35,8 +38,8 @@
 // Handles no exception
 // over flow will return unspecified data

-llroundf4_t 
-llroundf4 (vector float in)
+static inline llroundf4_t 
+_llroundf4 (vector float in)
 {
  llroundf4_t res;
  vec_int4 exp;
@@ -90,8 +93,8 @@ llroundf4 (vector float in)
  addend1 = spu_shuffle(mant2, mant3, ((vec_uchar16){0x80,0x80,0x80,0x80,0x80,0x80,0x80,8, 0x80,0x80,0x80,0x80,0x80,0x80,0x80,24}));
  addend0 = spu_rlmask(addend0, -7);
  addend1 = spu_rlmask(addend1, -7);
-//  addend0 = spu_and(spu_rlqw(res0, 1), ((vec_uint4){ 0,1,0,1}));
-//  addend1 = spu_and(spu_rlqw(res1, 1), ((vec_uint4){ 0,1,0,1}));
+  //  addend0 = spu_and(spu_rlqw(res0, 1), ((vec_uint4){ 0,1,0,1}));
+  //  addend1 = spu_and(spu_rlqw(res1, 1), ((vec_uint4){ 0,1,0,1}));
  res0 = spu_addx(res0, addend0, spu_rlqwbyte(spu_genc(res0, addend0), 4));
  res1 = spu_addx(res1, addend1, spu_rlqwbyte(spu_genc(res1, addend1), 4));

@@ -113,3 +116,5 @@ llroundf4 (vector float in)

  return res;
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/log10f4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/log10f4.h
@@ -27,53 +27,57 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_LOG10F4_H___
+#define ___SIMD_MATH_LOG10F4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

+#include <simdmath/divf4.h>

-#define _LOG10F_H_loga2msb ((float)0.3010299205780f) 
-#define _LOG10F_H_loga2lsb ((float)7.5085978266e-8f)
-#define _LOG10F_H_logaemsb ((float)0.4342944622040f)
-#define _LOG10F_H_logaelsb ((float)1.9699272335e-8f)
-#define _LOG10F_H_logae   ((float)0.4342944819033f)
+#define __LOG10F_loga2msb 0.3010299205780f 
+#define __LOG10F_loga2lsb 7.5085978266e-8f
+#define __LOG10F_logaemsb 0.4342944622040f
+#define __LOG10F_logaelsb 1.9699272335e-8f
+#define __LOG10F_logae    0.4342944819033f

-#define _LOG10F_H_c0 ((float)(0.2988439998f)) 
-#define _LOG10F_H_c1 ((float)(0.3997655209f))
-#define _LOG10F_H_c2 ((float)(0.6666679125f))
+#define __LOG10F_c0 0.2988439998f 
+#define __LOG10F_c1 0.3997655209f
+#define __LOG10F_c2 0.6666679125f

-vector float
-log10f4 (vector float x)
+static inline vector float
+_log10f4 (vector float x)
 {
  vec_int4 zeros = spu_splats((int)0);
  vec_float4 ones = spu_splats(1.0f);
-  vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(x, (vec_float4)zeros);
+  vec_uint4 zeromask = spu_cmpeq(x, (vec_float4)zeros);

-  vec_int4 expmask = spu_splats((int)0x7F800000);
-  vec_int4 xexp = spu_add( spu_rlmask(spu_and((vec_int4)x, expmask), -23), -126 );
-  x = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), (vec_uchar16)expmask);
+  vec_uint4 expmask = spu_splats(0x7F800000U);
+  vec_int4 xexp = spu_add( spu_rlmask(spu_and((vec_int4)x, (vec_int4)expmask), -23), -126 );
+  x = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), expmask);

-  vec_uint4  mask = spu_cmpgt(spu_splats((float)0.7071067811865f), x);
+  vec_uint4  mask = spu_cmpgt(spu_splats(0.7071067811865f), x);
  x    = spu_sel(x   , spu_add(x, x)                   , mask);
  xexp = spu_sel(xexp, spu_sub(xexp,spu_splats((int)1)), mask);
  
  vec_float4 x1 = spu_sub(x , ones);
-  vec_float4 z  = divf4  (x1, spu_add(x, ones));
+  vec_float4 z  = _divf4  (x1, spu_add(x, ones));
  vec_float4 w  = spu_mul(z , z);
  vec_float4 polyw;
-  polyw = spu_madd(spu_splats(_LOG10F_H_c0), w, spu_splats(_LOG10F_H_c1));
-  polyw = spu_madd(polyw                   , w, spu_splats(_LOG10F_H_c2));
+  polyw = spu_madd(spu_splats(__LOG10F_c0), w, spu_splats(__LOG10F_c1));
+  polyw = spu_madd(polyw                  , w, spu_splats(__LOG10F_c2));
  
  vec_float4 yneg = spu_mul(z, spu_msub(polyw, w, x1));
  vec_float4 wnew = spu_convtf(xexp,0);
  
-  vec_float4 zz1 = spu_madd(spu_splats(_LOG10F_H_logaemsb), x1, 
-			    spu_mul(spu_splats(_LOG10F_H_loga2msb),wnew));
-  vec_float4 zz2 = spu_madd(spu_splats(_LOG10F_H_logaelsb), x1,
-			    spu_madd(spu_splats(_LOG10F_H_loga2lsb), wnew, 
-				     spu_mul(spu_splats(_LOG10F_H_logae), yneg))
+  vec_float4 zz1 = spu_madd(spu_splats(__LOG10F_logaemsb), x1, 
+			    spu_mul(spu_splats(__LOG10F_loga2msb),wnew));
+  vec_float4 zz2 = spu_madd(spu_splats(__LOG10F_logaelsb), x1,
+			    spu_madd(spu_splats(__LOG10F_loga2lsb), wnew, 
+				     spu_mul(spu_splats(__LOG10F_logae), yneg))
 			    );
  
  return spu_sel(spu_add(zz1,zz2), (vec_float4)zeromask, zeromask);
 }

-
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/log1pf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/log1pf4.h
@@ -27,25 +27,34 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_LOG1PF4_H___
+#define ___SIMD_MATH_LOG1PF4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>
-vector float
-log1pf4 (vector float x)
+
+#include <simdmath/logf4.h>
+#include <simdmath/divf4.h>
+
+static inline vector float
+_log1pf4 (vector float x)
 {
-  vec_uchar16 nearzeromask = (vec_uchar16)spu_and(spu_cmpgt(x, spu_splats(-0.5f)),
-                                                  spu_cmpgt(spu_splats(0.5f), x));
+  vec_uint4 nearzeromask = spu_and(spu_cmpgt(x, spu_splats(-0.5f)),
+				   spu_cmpgt(spu_splats(0.5f), x));
  vec_float4 x2 = spu_mul(x,x);
  vec_float4 d0, d1, n0, n1;

-  d0 = spu_madd(x , spu_splats((float)1.5934420741f), spu_splats((float)0.8952856868f));
-  d1 = spu_madd(x , spu_splats((float)0.1198195734f), spu_splats((float)0.8377145063f));
+  d0 = spu_madd(x , spu_splats(1.5934420741f), spu_splats(0.8952856868f));
+  d1 = spu_madd(x , spu_splats(0.1198195734f), spu_splats(0.8377145063f));
  d1 = spu_madd(x2, d1, d0);
  
-  n0 = spu_madd(x , spu_splats((float)1.1457993413f), spu_splats((float)0.8952856678f));
-  n1 = spu_madd(x , spu_splats((float)0.0082862580f), spu_splats((float)0.3394238808f));
+  n0 = spu_madd(x , spu_splats(1.1457993413f), spu_splats(0.8952856678f));
+  n1 = spu_madd(x , spu_splats(0.0082862580f), spu_splats(0.3394238808f));
  n1 = spu_madd(x2, n1, n0);
 
-  return spu_sel(logf4(spu_add(x, spu_splats(1.0f))),
-                 spu_mul(x, divf4(n1, d1)),
+  return spu_sel(_logf4(spu_add(x, spu_splats(1.0f))),
+                 spu_mul(x, _divf4(n1, d1)),
                 nearzeromask);
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/log2f4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/log2f4.h
@@ -27,45 +27,52 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_LOG2F4_H___
+#define ___SIMD_MATH_LOG2F4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>

-#define _LOG2F_H_l2emsb ((float)1.4426950216293f) 
-#define _LOG2F_H_l2elsb ((float)1.9259629911e-8f) 
-#define _LOG2F_H_l2e   ((float)1.4426950408890f) 
+#include <simdmath/divf4.h>

-#define _LOG2F_H_c0 ((float)(0.2988439998f)) 
-#define _LOG2F_H_c1 ((float)(0.3997655209f))
-#define _LOG2F_H_c2 ((float)(0.6666679125f))
+#define __LOG2F_l2emsb 1.4426950216293f
+#define __LOG2F_l2elsb 1.9259629911e-8f
+#define __LOG2F_l2e    1.4426950408890f

-vector float
-log2f4 (vector float x)
+#define __LOG2F_c0 0.2988439998f
+#define __LOG2F_c1 0.3997655209f
+#define __LOG2F_c2 0.6666679125f
+
+static inline vector float
+_log2f4 (vector float x)
 {
  vec_int4 zeros = spu_splats((int)0);
  vec_float4 ones = spu_splats(1.0f);
-  vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(x, (vec_float4)zeros);
+  vec_uint4 zeromask = spu_cmpeq(x, (vec_float4)zeros);

  vec_int4 expmask = spu_splats((int)0x7F800000);
  vec_int4 xexp = spu_add( spu_rlmask(spu_and((vec_int4)x, expmask), -23), -126 );
-  x = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), (vec_uchar16)expmask);
+  x = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), (vec_uint4)expmask);


-  vec_uint4  mask = spu_cmpgt(spu_splats((float)0.7071067811865f), x);
+  vec_uint4  mask = spu_cmpgt(spu_splats(0.7071067811865f), x);
  x    = spu_sel(x   , spu_add(x, x)                   , mask);
  xexp = spu_sel(xexp, spu_sub(xexp,spu_splats((int)1)), mask);
  
  vec_float4 x1 = spu_sub(x , ones);
-  vec_float4 z  = divf4(x1, spu_add(x, ones));
+  vec_float4 z  = _divf4(x1, spu_add(x, ones));
  vec_float4 w  = spu_mul(z , z);
  vec_float4 polyw;
-  polyw = spu_madd(spu_splats(_LOG2F_H_c0), w, spu_splats(_LOG2F_H_c1));
-  polyw = spu_madd(polyw                  , w, spu_splats(_LOG2F_H_c2));
+  polyw = spu_madd(spu_splats(__LOG2F_c0), w, spu_splats(__LOG2F_c1));
+  polyw = spu_madd(polyw                 , w, spu_splats(__LOG2F_c2));
  
  vec_float4 yneg = spu_mul(z, spu_msub(polyw, w, x1));
-  vec_float4 zz1 = spu_madd(spu_splats(_LOG2F_H_l2emsb), x1, spu_convtf(xexp,0));
-  vec_float4 zz2 = spu_madd(spu_splats(_LOG2F_H_l2elsb), x1,
-			    spu_mul(spu_splats(_LOG2F_H_l2e), yneg)
+  vec_float4 zz1 = spu_madd(spu_splats(__LOG2F_l2emsb), x1, spu_convtf(xexp,0));
+  vec_float4 zz2 = spu_madd(spu_splats(__LOG2F_l2elsb), x1,
+			    spu_mul(spu_splats(__LOG2F_l2e), yneg)
 			    );
  
  return spu_sel(spu_add(zz1,zz2), (vec_float4)zeromask, zeromask);
 }
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/logbd2.h
+++ b/Extras/simdmathlibrary/spu/simdmath/logbd2.h
@@ -0,0 +1,86 @@
+/* logbd2 - for each element of vector x, return the exponent of normalized double x' as floating point value
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ___SIMD_MATH_LOGBD2_H___
+#define ___SIMD_MATH_LOGBD2_H___
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+#include <math.h>
+
+static inline vector double
+_logbd2 (vector double x)
+{
+  vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
+  vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
+  vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
+
+  vec_ullong2 sign = spu_splats(0x8000000000000000ull);
+  vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
+  vec_ullong2 zero = spu_splats(0x0000000000000000ull);
+
+  vec_ullong2 isnan, isinf, iszero;
+  vec_double2 logb = (vec_double2)zero;
+  vec_llong2 e1, e2;
+  vec_uint4 cmpgt, cmpeq, cmpzr;
+  vec_int4 lz, lz0, lz1;
+
+  //NAN: x is NaN (all-ones exponent and non-zero mantissa)
+  cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
+  cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
+  isnan = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ),
+			       spu_and( spu_shuffle( cmpeq, cmpeq, even ), 
+					spu_shuffle( cmpgt, cmpgt, odd ) ) );
+  logb = spu_sel( logb, (vec_double2)spu_splats(0x7FF8000000000000ll), isnan );
+
+  //INF: x is infinite (all-ones exponent and zero mantissa)
+  isinf = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
+  logb = spu_sel( logb, (vec_double2)spu_splats(__builtin_huge_val()), isinf );
+
+  //HUGE_VAL: x is zero (zero exponent and zero mantissa)
+  cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
+  iszero = (vec_ullong2)spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) );
+  logb = spu_sel( logb, (vec_double2)spu_splats(-__builtin_huge_val()), iszero );
+
+  //Integer Exponent: if x is normal or subnormal, return unbiased exponent of normalized double x
+  e1 = (vec_llong2)spu_and( (vec_llong2)x, (vec_llong2)expn );
+  e2 = (vec_llong2)spu_rlmask((vec_uint4)e1, -20);
+
+  lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) );
+  lz0 = (vec_int4)spu_shuffle( lz, lz, even );
+  lz0 = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)12) ), spu_cmpgt( lz0, (int)11 ) );
+  lz1 = spu_sel( (vec_int4)zero, spu_shuffle( lz, lz, odd), spu_cmpeq( lz0, (int)20 ) );
+
+  logb = spu_sel( logb, spu_extend( spu_convtf( spu_sub( spu_sub( (vec_int4)e2, spu_splats((int)1023) ), spu_add( lz0, lz1 ) ), 0 ) ), 
+		  spu_nor( isnan, spu_or( isinf, iszero ) ) );
+
+  return logb;
+}
+
+#endif
--- a/Extras/simdmathlibrary/spu/simdmath/logbf4.h
+++ b/Extras/simdmathlibrary/spu/simdmath/logbf4.h
@@ -27,20 +27,20 @@
   POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef ___SIMD_MATH_LOGBF4_H___
+#define ___SIMD_MATH_LOGBF4_H___
+
 #include <simdmath.h>
 #include <spu_intrinsics.h>
 #include <math.h>

-#ifndef HUGE_VALF
-#define HUGE_VALF __builtin_huge_valf ()
-#endif
-
-vector float
-logbf4 (vector float x)
+static inline vector float
+_logbf4 (vector float x)
 {
  vec_int4 e1 = spu_and((vec_int4)x, spu_splats((int)0x7F800000));
-  vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(e1, 0);
+  vec_uint4 zeromask = spu_cmpeq(e1, 0);
  e1 = spu_sub(e1, spu_splats((int)0x3F800000));
  return spu_sel(spu_convtf(e1,23), (vec_float4)spu_splats(-HUGE_VALF), zeromask);
 }

+#endif
--- a/Show More
+++ b/Show More