Added SCE SIMD math library in Extras/simdmathlibrary

The upcoming vectormath that will used to speed up the SPU version of Extras/BulletMultiThreaded depends on this.
2007-07-23 04:58:24 +00:00
parent 685138d033
commit 7529cdb3f6
287 changed files with 32064 additions and 0 deletions
--- a/Extras/simdmathlibrary/spu/Makefile
+++ b/Extras/simdmathlibrary/spu/Makefile
@@ -0,0 +1,131 @@
+# make file to build the libsimdmath library for SPU
+#   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms,
+#   with or without modification, are permitted provided that the
+#   following conditions are met:
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in the
+#      documentation and/or other materials provided with the distribution.
+#    * Neither the name of the Sony Computer Entertainment Inc nor the names
+#      of its contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+#   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+#   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+#   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+#   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+#   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+#   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+#   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+#   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+#   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+#   POSSIBILITY OF SUCH DAMAGE.
+
+
+# All that you do to add a file is edit OBJS, the rest will just work
+
+prefix = /usr
+DESTDIR =
+
+OBJS = fabsd2.o fabsf4.o truncf4.o divf4.o tanf4.o isnanf4.o isnand2.o isinff4.o isinfd2.o \
+	is0denormf4.o is0denormd2.o recipd2.o divd2.o tand2.o sqrtf4.o absi4.o sqrtd2.o \
+	sinf4.o isgreaterd2.o sind2.o sincosf4.o rsqrtf4.o signbitf4.o signbitd2.o \
+	rsqrtd2.o copysignf4.o remainderf4.o recipf4.o copysignd2.o log2f4.o \
+	negatef4.o negated2.o modff4.o asinf4.o frexpf4.o frexpd2.o ldexpf4.o cbrtf4.o \
+	cosd2.o cosf4.o hypotf4.o hypotd2.o ceilf4.o fmaf4.o fmaxf4.o fminf4.o floorf4.o \
+	fdimf4.o fmodf4.o negatei4.o logf4.o log1pf4.o log10f4.o expm1f4.o \
+	expf4.o divi4.o exp2f4.o powf4.o atanf4.o atan2f4.o acosf4.o ilogbf4.o ilogbd2.o \
+	logbf4.o logbd2.o llroundd2.o llroundf4.o llrintf4.o isequalf4.o isequald2.o \
+	islessgreaterf4.o islessgreaterd2.o islessf4.o islessd2.o isgreaterf4.o \
+	isgreaterd2.o islessequalf4.o islessequald2.o isgreaterequalf4.o isgreaterequald2.o \
+	isfinitef4.o isfinited2.o isnormalf4.o isnormald2.o isunorderedf4.o isunorderedd2.o \
+	llrintd2.o roundf4.o rintf4.o irintf4.o iroundf4.o fmad2.o fmaxd2.o fmind2.o fdimd2.o \
+	nextafterd2.o fpclassifyf4.o fpclassifyd2.o nearbyintd2.o nextafterf4.o nearbyintf4.o \
+	llabsi2.o truncd2.o roundd2.o rintd2.o negatell2.o divu4.o modfd2.o lldivu2.o \
+	ceild2.o floord2.o ldexpd2.o scalbnf4.o scalbllnd2.o lldivi2.o remquof4.o remquod2.o\
+	fmodd2.o remainderd2.o
+
+
+INCLUDES_SPU = -I../
+
+CROSS_SPU = spu-
+AR_SPU = $(CROSS_SPU)ar
+CC_SPU = $(CROSS_SPU)gcc
+CXX_SPU = $(CROSS_SPU)g++
+RANLIB_SPU = $(CROSS_SPU)ranlib
+TEST_CMD_SPU =
+
+CFLAGS_SPU=$(INCLUDES_SPU)  -O2  -W -Wall
+
+INSTALL = install
+
+MAKE_DEFS = \
+	prefix='$(prefix)' \
+	DESTDIR='$(DESTDIR)' \
+	LIB_BASE='$(LIB_BASE)' \
+	LIB_NAME='$(LIB_NAME)' \
+	STATIC_LIB='$(STATIC_LIB)' \
+	CROSS_SPU='$(CROSS_SPU)' \
+	AR_SPU='$(AR_SPU)' \
+	CC_SPU='$(CC_SPU)' \
+	CXX_SPU='$(CXX_SPU)' \
+	RANLIB_SPU='$(RANLIB_SPU)' \
+	TEST_CMD_SPU='$(TEST_CMD_SPU)' \
+	INSTALL='$(INSTALL)'
+
+LIB_BASE = simdmath
+LIB_NAME = lib$(LIB_BASE)
+STATIC_LIB = $(LIB_NAME).a
+
+all: $(STATIC_LIB)
+
+$(STATIC_LIB): $(OBJS)
+	$(AR_SPU) cr $@ $(OBJS)
+	$(RANLIB_SPU) $@
+
+install: $(STATIC_LIB)
+	$(INSTALL) -m 755 -d $(DESTDIR)$(prefix)/spu/include
+	$(INSTALL) -m 644 ../simdmath.h $(DESTDIR)$(prefix)/spu/include/
+	$(INSTALL) -m 755 -d $(DESTDIR)$(prefix)/spu/lib
+	$(INSTALL) $(STATIC_LIB) $(DESTDIR)$(prefix)/spu/lib/$(STATIC_LIB)
+
+clean:
+	cd tests; $(MAKE) $(MAKE_DEFS) clean
+	rm -f $(OBJS)
+	rm -f $(STATIC_LIB)
+
+$(OBJS): ../simdmath.h
+
+check: $(STATIC_LIB)
+	cd tests; $(MAKE) $(MAKE_DEFS); $(MAKE) $(MAKE_DEFS) check
+
+
+# Some Objects have special header files.
+sinf4.o sind2.o sincosf4.o cosd2.o: sincos_c.h
+lldivu2.o lldivi2.o : lldiv.h
+
+
+
+%.o: %.c
+	$(CC_SPU) $(CFLAGS_SPU) -c $<
+
+#----------
+#   C++
+#----------
+%.o: %.C
+	$(CXX_SPU) $(CFLAGS_SPU) -c $<
+
+%.o: %.cpp
+	$(CXX_SPU) $(CFLAGS_SPU) -c $<
+
+%.o: %.cc
+	$(CXX_SPU) $(CFLAGS_SPU) -c $<
+
+%.o: %.cxx
+	$(CXX_SPU) $(CFLAGS_SPU) -c $<
+
--- a/Extras/simdmathlibrary/spu/absi4.c
+++ b/Extras/simdmathlibrary/spu/absi4.c
@@ -0,0 +1,40 @@
+/* absi4 - for each of four integer slots, compute absolute value.
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector signed int
+absi4 (vector signed int x)
+{
+   vec_int4 neg;
+   neg = spu_sub( 0, x );
+   return spu_sel( neg, x, spu_cmpgt( x, -1 ) );
+}
+
--- a/Extras/simdmathlibrary/spu/acosf4.c
+++ b/Extras/simdmathlibrary/spu/acosf4.c
@@ -0,0 +1,78 @@
+/* acosf4 - 
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+//
+// Computes the inverse cosine of all four slots of x
+//
+vector float
+acosf4 (vector float x)
+{
+    vec_float4 result, xabs;
+    vec_float4 t1;
+    vec_float4 xabs2, xabs4;
+    vec_float4 hi, lo;
+    vec_float4 neg, pos;
+    vec_uint4 select;
+    
+    xabs   = (vec_float4)(spu_rlmask(spu_sl((vec_uint4)(x), 1), -1));
+    select = (vec_uint4)(spu_rlmaska((vector signed int)(x), -31));
+    
+    t1 = sqrtf4(spu_sub( ((vec_float4){1.0, 1.0, 1.0, 1.0}) , xabs));
+    
+    /* Instruction counts can be reduced if the polynomial was
+     * computed entirely from nested (dependent) fma's. However, 
+     * to reduce the number of pipeline stalls, the polygon is evaluated 
+     * in two halves (hi amd lo). 
+     */
+    xabs2 = spu_mul(xabs,  xabs);
+    xabs4 = spu_mul(xabs2, xabs2);
+    hi = spu_madd(spu_splats(-0.0012624911f), xabs, spu_splats(0.0066700901f));
+    hi = spu_madd(hi, xabs, spu_splats(-0.0170881256f));
+    hi = spu_madd(hi, xabs, spu_splats( 0.0308918810f));
+    lo = spu_madd(spu_splats(-0.0501743046f), xabs, spu_splats(0.0889789874f));
+    lo = spu_madd(lo, xabs, spu_splats(-0.2145988016f));
+    lo = spu_madd(lo, xabs, spu_splats( 1.5707963050f));
+    
+    result = spu_madd(hi, xabs4, lo);
+    
+    /* Adjust the result if x is negactive.
+     */
+    neg = spu_nmsub(t1, result, spu_splats(3.1415926535898f));
+    pos = spu_mul(t1, result);
+    
+    result = spu_sel(pos, neg, select);
+    
+    return result;
+}
+
+
--- a/Extras/simdmathlibrary/spu/asinf4.c
+++ b/Extras/simdmathlibrary/spu/asinf4.c
@@ -0,0 +1,85 @@
+/* asinf4 - Computes the inverse sine of all four slots of x
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector float
+asinf4 (vector float x)
+{
+    // positive = (x > 0)
+    //
+    vec_uchar16 positive = (vec_uchar16)spu_cmpgt(x,spu_splats(0.0f));
+
+    // gtHalf = (|x| > 0.5)
+    //
+    vec_uchar16 gtHalf = (vec_uchar16)spu_cmpabsgt(x,spu_splats(0.5f));    
+
+    // x = absf(x)
+    //
+    x = (vec_float4)spu_and((vec_int4)x,spu_splats((int)0x7fffffff));
+
+
+    // if (x > 0.5)
+    //    g = 0.5 - 0.5*x
+    //    x = -2 * sqrtf(g)
+    // else
+    //    g = x * x
+    //
+    vec_float4 g = spu_sel(spu_mul(x,x),spu_madd(spu_splats(-0.5f),x,spu_splats(0.5f)),gtHalf);
+    
+    x = spu_sel(x,spu_mul(spu_splats(-2.0f),sqrtf4(g)),gtHalf);
+
+    // Compute the polynomials and take their ratio
+    //  denom = (1.0f*g + -0.554846723e+1f)*g + 5.603603363f
+    //  num = x * g * (-0.504400557f * g + 0.933933258f)
+    //
+    vec_float4 denom = spu_add(g,spu_splats(-5.54846723f));
+    vec_float4 num = spu_madd(spu_splats(-0.504400557f),g,spu_splats(0.933933258f));
+    denom = spu_madd(denom,g,spu_splats(5.603603363f));
+    num = spu_mul(spu_mul(x,g),num);
+
+    
+    // x = x + num / denom
+    //
+    x = spu_add(x,divf4(num,denom));
+
+    // if (x > 0.5)
+    //    x = x + M_PI_2
+    //
+    x = spu_sel(x,spu_add(x,spu_splats(1.57079632679489661923f)),gtHalf);
+
+    
+    // if (!positive) x = -x
+    //
+    x = spu_sel((vec_float4)spu_xor(spu_splats((int)0x80000000),(vec_int4)x),x,positive);
+
+    return x;
+}
+
--- a/Extras/simdmathlibrary/spu/atan2f4.c
+++ b/Extras/simdmathlibrary/spu/atan2f4.c
@@ -0,0 +1,60 @@
+/* atan2f4 -
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+
+//
+// Inverse tangent function of two variables
+//
+vector float
+atan2f4 (vector float y, vector float x)
+{
+    vec_float4 res = atanf4(divf4(y,x));
+
+    // Use the arguments to determine the quadrant of the result:
+    // if (x < 0)
+    //   if (y < 0)
+    //      res = -PI + res
+    //   else
+    //      res = PI + res
+    //
+    vec_uchar16 yNeg = (vec_uchar16)spu_cmpgt(spu_splats(0.0f),y);
+    vec_uchar16 xNeg = (vec_uchar16)spu_cmpgt(spu_splats(0.0f),x);
+
+    vec_float4 bias = spu_sel(spu_splats(3.14159265358979323846f),spu_splats(-3.14159265358979323846f),yNeg);
+
+    vec_float4 newRes = spu_add(bias, res);
+
+    res = spu_sel(res,newRes,xNeg);
+
+    return res;
+}
--- a/Extras/simdmathlibrary/spu/atanf4.c
+++ b/Extras/simdmathlibrary/spu/atanf4.c
@@ -0,0 +1,76 @@
+/* atanf4 - 
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+//
+// Computes the inverse tangent of all four slots of x. 
+//
+vector float
+atanf4 (vector float x)
+{
+    vec_float4 bias;
+    vec_float4 x2, x3, x4, x8, x9;
+    vec_float4 hi, lo;
+    vec_float4 result;
+    vec_float4 inv_x;
+    vec_uint4 sign;
+    vec_uint4 select;
+    
+    sign = spu_sl(spu_rlmask((vec_uint4)x, -31), 31);
+    inv_x = recipf4(x);
+    inv_x = (vec_float4)spu_xor((vec_uint4)inv_x, spu_splats(0x80000000u));
+    
+    select = (vec_uint4)spu_cmpabsgt(x, spu_splats(1.0f));
+    bias = (vec_float4)spu_or(sign, (vec_uint4)(spu_splats(1.57079632679489661923f)));
+    bias = (vec_float4)spu_and((vec_uint4)bias, select);
+    
+    x = spu_sel(x, inv_x, select);
+    
+    bias = spu_add(bias, x);
+    x2 = spu_mul(x, x);
+    x3 = spu_mul(x2, x);
+    x4 = spu_mul(x2, x2);
+    x8 = spu_mul(x4, x4);
+    x9 = spu_mul(x8, x);
+    hi = spu_madd(spu_splats(0.0028662257f), x2, spu_splats(-0.0161657367f));
+    hi = spu_madd(hi, x2, spu_splats(0.0429096138f));
+    hi = spu_madd(hi, x2, spu_splats(-0.0752896400f));
+    hi = spu_madd(hi, x2, spu_splats(0.1065626393f));
+    lo = spu_madd(spu_splats(-0.1420889944f), x2, spu_splats(0.1999355085f));
+    lo = spu_madd(lo, x2, spu_splats(-0.3333314528f));
+    lo = spu_madd(lo, x3, bias);
+    
+    result = spu_madd(hi, x9, lo);
+    
+    return result;
+}
+
--- a/Extras/simdmathlibrary/spu/cbrtf4.c
+++ b/Extras/simdmathlibrary/spu/cbrtf4.c
@@ -0,0 +1,105 @@
+/* cbrtf4 - 
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+
+#define __calcQuot(xexp) n = xexp;					\
+  vec_uchar16 negxexpmask = (vec_uchar16)spu_cmpgt(spu_splats(0), n);	\
+  n = spu_sel(n, spu_add(n,2), negxexpmask);				\
+									\
+  quot = spu_add(spu_rlmaska(n,-2), spu_rlmaska(n,-4));			\
+  quot = spu_add(quot, spu_rlmaska(quot, -4));				\
+  quot = spu_add(quot, spu_rlmaska(quot, -8));				\
+  quot = spu_add(quot, spu_rlmaska(quot,-16));				\
+  vec_int4 r = spu_sub(spu_sub(n,quot), spu_sl(quot,1));		\
+  quot = spu_add(							\
+		 quot,							\
+		 spu_rlmaska(						\
+			     spu_add(					\
+				     spu_add(r,5),			\
+				     spu_sl (r,2)			\
+				     ),					\
+			     -4						\
+			     )						\
+		 );							\
+
+#define _CBRTF_H_cbrt2 1.2599210498948731648             // 2^(1/3)
+#define _CBRTF_H_sqr_cbrt2 1.5874010519681994748         // 2^(2/3)
+
+vector float
+cbrtf4 (vector float x)
+{
+  vec_float4 zeros = spu_splats(0.0f);
+  vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(x, zeros);
+  vec_int4 xexp, n;
+  vec_float4 sgnmask = (vec_float4)spu_splats(0x7FFFFFFF);
+  vec_uchar16 negmask = (vec_uchar16)spu_cmpgt(spu_splats(0.0f), x);
+  x = spu_and(x, sgnmask);
+
+  x = frexpf4(x, &xexp);
+  vec_float4 p = spu_madd(
+			  spu_madd(x, spu_splats(-0.191502161678719066f), spu_splats(0.697570460207922770f)),
+			  x,
+			  spu_splats(0.492659620528969547f)
+			  );
+  vec_float4 p3 = spu_mul(p, spu_mul(p, p));
+  vec_int4 quot; 
+  __calcQuot(xexp);
+  vec_int4 modval = spu_sub(spu_sub(xexp,quot), spu_sl(quot,1)); // mod = xexp - 3*quotient
+  vec_float4 factor = spu_splats((float)(1.0/_CBRTF_H_sqr_cbrt2));
+  factor = spu_sel(factor, spu_splats((float)(1.0/_CBRTF_H_cbrt2)), spu_cmpeq(modval,-1));
+  factor = spu_sel(factor, spu_splats((float)(      1.0)), spu_cmpeq(modval, 0));
+  factor = spu_sel(factor, spu_splats((float)(    _CBRTF_H_cbrt2)), spu_cmpeq(modval, 1));
+  factor = spu_sel(factor, spu_splats((float)(_CBRTF_H_sqr_cbrt2)), spu_cmpeq(modval, 2));
+
+  vec_float4 pre  = spu_mul(p, factor);
+  vec_float4 numr = spu_madd(x , spu_splats(2.0f), p3);
+  vec_float4 denr = spu_madd(p3, spu_splats(2.0f), x );
+  vec_float4 res = spu_mul(pre, divf4(numr, denr));
+  res = ldexpf4(res, quot);
+
+  return spu_sel(spu_sel(res, spu_orc(res,sgnmask), negmask),
+		 zeros,
+		 zeromask);
+}
+
+/*
+_FUNC_DEF(vec_float4, cbrtf4, (vec_float4 x))
+{
+  vec_uchar16 neg  = (vec_uchar16)spu_cmpgt(spu_splats(0.0f), x);
+  vec_float4  sbit = (vec_float4)spu_splats((int)0x80000000);
+  vec_float4 absx = spu_andc(x, sbit);
+  vec_float4 res = exp2f4(spu_mul(spu_splats((float)0.3333333333333f), log2f4(absx)));
+  res = spu_sel(res, spu_or(sbit, res), neg);
+  return res;
+}
+*/
--- a/Extras/simdmathlibrary/spu/ceild2.c
+++ b/Extras/simdmathlibrary/spu/ceild2.c
@@ -0,0 +1,94 @@
+/* ceild2 - for each of two doule slots, round up to smallest integer not less than the value.
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector double
+ceild2(vector double in)
+{
+  vec_uchar16 swap_words = ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11});
+  vec_uchar16 splat_hi = ((vec_uchar16){0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11});
+  vec_uint4 one =  ((vec_uint4){0, 1, 0, 1});
+  vec_int4 exp, shift;
+  vec_uint4 mask, mask_1, frac_mask, addend, insert, pos, equal0, e_0, e_00, e_sign, exp_ge0;
+  vec_ullong2 sign = spu_splats(0x8000000000000000ULL);
+  vec_double2 in_hi, out;
+  vec_double2 one_d = spu_splats((double)1.0);
+  vec_uint4 zero = spu_splats((unsigned int)0x0);
+
+  /* This function generates the following component
+   * based upon the inputs.
+   *
+   *   mask = bits of the input that need to be replaced.
+   *   insert = value of the bits that need to be replaced
+   *   addend = value to be added to perform function.
+   *
+   * These are applied as follows:.
+   *
+   *   out = ((in & mask) | insert) + addend
+   */
+
+  in_hi = spu_shuffle(in, in, splat_hi);
+  exp = spu_and(spu_rlmask((vec_int4)in_hi, -20), 0x7FF);
+  shift = spu_sub(((vec_int4){1023, 1043, 1023, 1043}), exp);
+
+  /* clamp shift to the range 0 to -31.
+   */
+  shift = spu_sel(spu_splats((int)-32), spu_andc(shift, (vec_int4)spu_cmpgt(shift, 0)), spu_cmpgt(shift, -32));
+  frac_mask = spu_rlmask(((vec_uint4){0xFFFFF, -1, 0xFFFFF, -1}), shift);
+  exp_ge0 = spu_cmpgt(exp, 0x3FE);
+  mask = spu_orc(frac_mask, exp_ge0);
+
+  /* addend = ((in & mask) && (in >= 0)) ? mask+1 : 0
+   */
+  mask_1 = spu_addx(mask, one, spu_rlqwbyte(spu_genc(mask, one), 4));
+  pos = spu_cmpgt((vec_int4)in_hi, -1);
+  //pos = spu_cmpgt((vec_int4)in_hi, 0x0); //it is also work
+  equal0 = spu_cmpeq(spu_and((vec_uint4)in, mask), 0);
+  addend = spu_andc(spu_and(mask_1, pos), spu_and(equal0, spu_shuffle(equal0, equal0, swap_words)));
+
+  /* insert
+   */
+  e_0 = spu_cmpeq(spu_andc((vec_uint4)in, (vec_uint4)sign), zero);
+  e_00 = spu_and(e_0, spu_shuffle(e_0, e_0, swap_words));
+  // e_sign = spu_sel(spu_splats((unsigned int)0x0), (vec_uint4)one_d, spu_cmpeq( spu_and((vec_uint4)in_hi, spu_splats((unsigned int)0x80000000)), zero));
+  e_sign = spu_and( (vec_uint4)one_d, spu_cmpeq( spu_and((vec_uint4)in_hi,spu_splats((unsigned int)0x80000000)), zero));
+  insert =spu_andc(spu_andc(e_sign, e_00), exp_ge0);
+
+  /* replace insert
+ */
+  in = spu_sel(in, (vec_double2)insert, spu_andc((vec_ullong2)mask, sign));
+
+  /* in + addend
+   */
+  out = (vec_double2)spu_addx((vec_uint4)in, addend, spu_rlqwbyte(spu_genc((vec_uint4)in, addend), 4));
+
+  return (out);
+}
--- a/Extras/simdmathlibrary/spu/ceilf4.c
+++ b/Extras/simdmathlibrary/spu/ceilf4.c
@@ -0,0 +1,54 @@
+/* ceilf4 - for each of four float slots, round up to smallest integer not less than the value.
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector float
+ceilf4 (vector float x)
+{
+   vec_int4   xi, xi1;
+   vec_uint4  inrange;
+   vec_float4 truncated, truncated1;
+    
+   // Find truncated value and one greater.
+
+   inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x4b000000), x );
+
+   xi = spu_convts( x, 0 );
+   xi1 = spu_add( xi, 1 );
+
+   truncated = spu_sel( x, spu_convtf( xi, 0 ), inrange );
+   truncated1 = spu_sel( x, spu_convtf( xi1, 0 ), inrange );
+
+   // If truncated value is less than input, add one.
+
+   return spu_sel( truncated, truncated1, spu_cmpgt( x, truncated ) );
+}
+
--- a/Extras/simdmathlibrary/spu/copysignd2.c
+++ b/Extras/simdmathlibrary/spu/copysignd2.c
@@ -0,0 +1,39 @@
+/* copysignd2 - for each of two double slots, return value with magnitude from x and sign from y.
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+
+vector double copysignd2 (vector double x, vector double y)
+{
+   return spu_sel( x, y, spu_splats(0x8000000000000000ull) );
+}
+
--- a/Extras/simdmathlibrary/spu/copysignf4.c
+++ b/Extras/simdmathlibrary/spu/copysignf4.c
@@ -0,0 +1,39 @@
+/* copysignf4 - for each of four float slots, return value with magnitude from x and sign from y.
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+
+vector float
+copysignf4 (vector float x, vector float y)
+{
+   return spu_sel( x, y, spu_splats(0x80000000) );
+}
+
--- a/Extras/simdmathlibrary/spu/cosd2.c
+++ b/Extras/simdmathlibrary/spu/cosd2.c
@@ -0,0 +1,127 @@
+/* cosd2 - Computes the cosine  of the each of two double slots.
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+#include "sincos_c.h"
+
+vector double
+cosd2 (vector double x)
+{
+    vec_double2 xl,xl2,xl3,res;
+    vec_double2 nan = (vec_double2)spu_splats(0x7ff8000000000000ull);
+    vec_uchar16 copyEven = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
+    vec_double2 tiny = (vec_double2)spu_splats(0x3e40000000000000ull);
+
+    // Range reduction using : xl = angle * TwoOverPi;
+    //  
+    xl = spu_mul(x, spu_splats(0.63661977236758134307553505349005744));
+
+    // Find the quadrant the angle falls in
+    // using:  q = (int) (ceil(abs(x))*sign(x))
+    //
+    xl = spu_add(xl,spu_sel(spu_splats(0.5),xl,spu_splats(0x8000000000000000ull)));
+    vec_float4 xf = spu_roundtf(xl);
+    vec_int4 q = spu_convts(xf,0);
+    q = spu_shuffle(q,q,copyEven);
+
+     
+    // Compute an offset based on the quadrant that the angle falls in
+    // 
+    vec_int4 offset = spu_add(spu_splats(1), spu_and(q,spu_splats(0x3)));
+
+    // Remainder in range [-pi/4..pi/4]
+    //
+    vec_float4 qf = spu_convtf(q,0);
+    vec_double2 qd = spu_extend(qf);
+    vec_double2 p1 = spu_nmsub(qd,spu_splats(_SINCOS_KC1D),x);
+    xl = spu_nmsub(qd,spu_splats(_SINCOS_KC2D),p1);
+
+    // Check if |xl| is a really small number
+    //
+    vec_double2 absXl = (vec_double2)spu_andc((vec_ullong2)xl, spu_splats(0x8000000000000000ull));
+    vec_ullong2 isTiny = (vec_ullong2)isgreaterd2(tiny,absXl);
+
+    // Compute x^2 and x^3
+    //
+    xl2 = spu_mul(xl,xl);
+    xl3 = spu_mul(xl2,xl);
+    
+    // Compute both the sin and cos of the angles
+    // using a polynomial expression:
+    //   cx = 1.0f + xl2 * ((((((c0 * xl2 + c1) * xl2 + c2) * xl2 + c3) * xl2 + c4) * xl2 + c5), and
+    //   sx = xl + xl3 * (((((s0 * xl2 + s1) * xl2 + s2) * xl2 + s3) * xl2 + s4) * xl2 + s5)
+    //
+
+    vec_double2 ct0 = spu_mul(xl2,xl2);
+    vec_double2 ct1 = spu_madd(spu_splats(_SINCOS_CC0D),xl2,spu_splats(_SINCOS_CC1D));
+    vec_double2 ct2 = spu_madd(spu_splats(_SINCOS_CC2D),xl2,spu_splats(_SINCOS_CC3D));
+    vec_double2 ct3 = spu_madd(spu_splats(_SINCOS_CC4D),xl2,spu_splats(_SINCOS_CC5D));
+    vec_double2 st1 = spu_madd(spu_splats(_SINCOS_SC0D),xl2,spu_splats(_SINCOS_SC1D));
+    vec_double2 st2 = spu_madd(spu_splats(_SINCOS_SC2D),xl2,spu_splats(_SINCOS_SC3D));
+    vec_double2 st3 = spu_madd(spu_splats(_SINCOS_SC4D),xl2,spu_splats(_SINCOS_SC5D));
+    vec_double2 ct4 = spu_madd(ct2,ct0,ct3);
+    vec_double2 st4 = spu_madd(st2,ct0,st3);
+    vec_double2 ct5 = spu_mul(ct0,ct0);
+    
+    vec_double2 ct6 = spu_madd(ct5,ct1,ct4);
+    vec_double2 st6 = spu_madd(ct5,st1,st4);
+
+    vec_double2 cx = spu_madd(ct6,xl2,spu_splats(1.0));
+    vec_double2 sx = spu_madd(st6,xl3,xl);
+
+    // Small angle approximation: sin(tiny) = tiny, cos(tiny) = 1.0
+    //
+    sx = spu_sel(sx,xl,isTiny);
+    cx = spu_sel(cx,spu_splats(1.0),isTiny);
+
+    // Use the cosine when the offset is odd and the sin
+    // when the offset is even
+    //
+    vec_ullong2 mask1 = (vec_ullong2)spu_cmpeq(spu_and(offset,(int)0x1),spu_splats((int)0));
+    res = spu_sel(cx,sx,mask1);
+
+    // Flip the sign of the result when (offset mod 4) = 1 or 2
+    //
+    vec_ullong2 mask2 = (vec_ullong2)spu_cmpeq(spu_and(offset,(int)0x2),spu_splats((int)0));
+    mask2 = spu_shuffle(mask2,mask2,copyEven);
+    res = spu_sel((vec_double2)spu_xor(spu_splats(0x8000000000000000ull),(vec_ullong2)res),res,mask2);
+    // if input = +/-Inf return NAN
+    //
+    res = spu_sel(res, nan, isnand2 (x));
+
+    // if input = 0 or denorm return or 1.0 
+    //
+    vec_ullong2 zeroMask = is0denormd2 (x);
+    res = spu_sel(res,spu_splats(1.0),zeroMask);
+    return res;
+}
+
+
--- a/Extras/simdmathlibrary/spu/cosf4.c
+++ b/Extras/simdmathlibrary/spu/cosf4.c
@@ -0,0 +1,94 @@
+/* cosf4 - Computes the cosine of each of the four slots by using a polynomial approximation
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+#include "sincos_c.h"
+
+vector float
+cosf4 (vector float x)
+{
+    vec_float4 xl,xl2,xl3,res;
+    vec_int4   q;
+
+    // Range reduction using : xl = angle * TwoOverPi;
+    //  
+    xl = spu_mul(x, spu_splats(0.63661977236f));
+
+    // Find the quadrant the angle falls in
+    // using:  q = (int) (ceil(abs(xl))*sign(xl))
+    //
+    xl = spu_add(xl,spu_sel(spu_splats(0.5f),xl,spu_splats(0x80000000)));
+    q = spu_convts(xl,0);
+
+     
+    // Compute an offset based on the quadrant that the angle falls in
+    // 
+    vec_int4 offset = spu_add(spu_splats(1),spu_and(q,spu_splats((int)0x3)));
+
+    // Remainder in range [-pi/4..pi/4]
+    //
+    vec_float4 qf = spu_convtf(q,0);
+    vec_float4 p1 = spu_nmsub(qf,spu_splats(_SINCOS_KC1),x);
+    xl  = spu_nmsub(qf,spu_splats(_SINCOS_KC2),p1);
+    
+    // Compute x^2 and x^3
+    //
+    xl2 = spu_mul(xl,xl);
+    xl3 = spu_mul(xl2,xl);
+    
+
+    // Compute both the sin and cos of the angles
+    // using a polynomial expression:
+    //   cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and
+    //   sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2)
+    //
+    vec_float4 ct1 = spu_madd(spu_splats(_SINCOS_CC0),xl2,spu_splats(_SINCOS_CC1));
+    vec_float4 st1 = spu_madd(spu_splats(_SINCOS_SC0),xl2,spu_splats(_SINCOS_SC1));
+
+    vec_float4 ct2 = spu_madd(ct1,xl2,spu_splats(_SINCOS_CC2));
+    vec_float4 st2 = spu_madd(st1,xl2,spu_splats(_SINCOS_SC2));
+    
+    vec_float4 cx = spu_madd(ct2,xl2,spu_splats(1.0f));
+    vec_float4 sx = spu_madd(st2,xl3,xl);
+
+    // Use the cosine when the offset is odd and the sin
+    // when the offset is even
+    //
+    vec_uchar16 mask1 = (vec_uchar16)spu_cmpeq(spu_and(offset,(int)0x1),spu_splats((int)0));
+    res = spu_sel(cx,sx,mask1);
+
+    // Flip the sign of the result when (offset mod 4) = 1 or 2
+    //
+    vec_uchar16 mask2 = (vec_uchar16)spu_cmpeq(spu_and(offset,(int)0x2),spu_splats((int)0));
+    res = spu_sel((vec_float4)spu_xor(spu_splats(0x80000000),(vec_uint4)res),res,mask2);
+
+    return res;
+    
+}
--- a/Extras/simdmathlibrary/spu/divd2.c
+++ b/Extras/simdmathlibrary/spu/divd2.c
@@ -0,0 +1,41 @@
+/* divd2 - for each of two double slots, divide numer by denom.
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+// Equal to numer * recipd2(denom)
+// See recipd2 for results of special values.
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector double
+divd2 (vector double numer, vector double denom)
+{
+   return spu_mul( numer, recipd2( denom ) );
+}
+
--- a/Extras/simdmathlibrary/spu/divf4.c
+++ b/Extras/simdmathlibrary/spu/divf4.c
@@ -0,0 +1,46 @@
+/* divf4 - for each of four float slots, divide numer by denom.
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector float
+divf4 (vector float numer, vector float denom)
+{
+   // Reciprocal estimate and 1 Newton-Raphson iteration.
+   // Uses constant of 1.0 + 1 ulp to improve accuracy.
+
+   vector float y0, y0numer;
+   vector float oneish = (vector float)spu_splats(0x3f800001);
+
+   y0 = spu_re( denom );
+   y0numer = spu_mul( numer, y0 );
+   return spu_madd( spu_nmsub( denom, y0, oneish ), y0numer, y0numer );
+}
+
--- a/Extras/simdmathlibrary/spu/divi4.c
+++ b/Extras/simdmathlibrary/spu/divi4.c
@@ -0,0 +1,109 @@
+/* divi4 - 
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+// divi4 - for each of four integer slots, compute quotient and remainder of numer/denom
+// and store in divi4_t struct.  Divide by zero produces quotient = 0, remainder = numerator.
+
+divi4_t divi4 (vector signed int numer, vector signed int denom)
+{
+   divi4_t res;
+   vec_int4 quot, newQuot, shift;
+   vec_uint4 numerPos, denomPos, quotNeg;
+   vec_uint4 numerAbs, denomAbs;
+   vec_uint4 denomZeros, numerZeros, denomLeft, oneLeft, denomShifted, oneShifted;
+   vec_uint4 newNum, skip, cont;
+   int       anyCont;
+
+   // Determine whether result needs sign change
+
+   numerPos = spu_cmpgt( numer, -1 );
+   denomPos = spu_cmpgt( denom, -1 );
+   quotNeg = spu_xor( numerPos, denomPos );
+    
+   // Use absolute values of numerator, denominator
+
+   numerAbs = (vec_uint4)spu_sel( spu_sub( 0, numer ), numer, numerPos );
+   denomAbs = (vec_uint4)spu_sel( spu_sub( 0, denom ), denom, denomPos );
+
+   // Get difference of leading zeros.
+   // Any possible negative value will be interpreted as a shift > 31
+
+   denomZeros = spu_cntlz( denomAbs );
+   numerZeros = spu_cntlz( numerAbs );
+
+   shift = (vec_int4)spu_sub( denomZeros, numerZeros );
+
+   // Shift denom to align leading one with numerator's
+
+   denomShifted = spu_sl( denomAbs, (vec_uint4)shift );
+   oneShifted = spu_sl( (vec_uint4)spu_splats(1), (vec_uint4)shift );
+   oneShifted = spu_sel( oneShifted, (vec_uint4)spu_splats(0), spu_cmpeq( denom, 0 ) );
+
+   // Shift left all leading zeros.
+
+   denomLeft = spu_sl( denomAbs, denomZeros );
+   oneLeft = spu_sl( (vec_uint4)spu_splats(1), denomZeros );
+
+   quot = spu_splats(0);
+
+   do
+   {
+      cont = spu_cmpgt( oneShifted, 0U );
+      anyCont = spu_extract( spu_gather( cont ), 0 );
+
+      newQuot = spu_or( quot, (vec_int4)oneShifted );
+
+      // Subtract shifted denominator from remaining numerator 
+      // when denominator is not greater.
+
+      skip = spu_cmpgt( denomShifted, numerAbs );
+      newNum = spu_sub( numerAbs, denomShifted );
+
+      // If denominator is greater, next shift is one more, otherwise
+      // next shift is number of leading zeros of remaining numerator.
+
+      numerZeros = spu_sel( spu_cntlz( newNum ), numerZeros, skip );
+      shift = (vec_int4)spu_sub( skip, numerZeros );
+
+      oneShifted = spu_rlmask( oneLeft, shift );
+      denomShifted = spu_rlmask( denomLeft, shift );
+
+      quot = spu_sel( newQuot, quot, skip );
+      numerAbs = spu_sel( newNum, numerAbs, spu_orc(skip,cont) );
+   } 
+   while ( anyCont );
+
+   res.quot = spu_sel( quot, spu_sub( 0, quot ), quotNeg );
+   res.rem = spu_sel( spu_sub( 0, (vec_int4)numerAbs ), (vec_int4)numerAbs, numerPos );
+   return res;
+}
+
--- a/Extras/simdmathlibrary/spu/divu4.c
+++ b/Extras/simdmathlibrary/spu/divu4.c
@@ -0,0 +1,97 @@
+/* divu4 - 
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+// divu4 - for each of four unsigned integer slots, compute quotient and remainder of numer/denom
+// and store in divu4_t struct.  Divide by zero produces quotient = 0, remainder = numerator.
+
+divu4_t divu4 (vector unsigned int numer, vector unsigned int denom)
+{
+   divu4_t res;
+   vec_int4 shift;
+   vec_uint4 quot, newQuot;
+   vec_uint4 denomZeros, numerZeros, denomLeft, oneLeft, denomShifted, oneShifted;
+   vec_uint4 newNum, skip, cont;
+   int       anyCont;
+
+   // Get difference of leading zeros.
+   // Any possible negative value will be interpreted as a shift > 31
+
+   denomZeros = spu_cntlz( denom );
+   numerZeros = spu_cntlz( numer );
+
+   shift = (vec_int4)spu_sub( denomZeros, numerZeros );
+
+   // Shift denom to align leading one with numerator's
+
+   denomShifted = spu_sl( denom, (vec_uint4)shift );
+   oneShifted = spu_sl( spu_splats(1U), (vec_uint4)shift );
+   oneShifted = spu_sel( oneShifted, spu_splats(0U), spu_cmpeq( denom, 0 ) );
+
+   // Shift left all leading zeros.
+
+   denomLeft = spu_sl( denom, denomZeros );
+   oneLeft = spu_sl( spu_splats(1U), denomZeros );
+
+   quot = spu_splats(0U);
+
+   do
+   {
+      cont = spu_cmpgt( oneShifted, 0U );
+      anyCont = spu_extract( spu_gather( cont ), 0 );
+
+      newQuot = spu_or( quot, oneShifted );
+
+      // Subtract shifted denominator from remaining numerator 
+      // when denominator is not greater.
+
+      skip = spu_cmpgt( denomShifted, numer );
+      newNum = spu_sub( numer, denomShifted );
+
+      // If denominator is greater, next shift is one more, otherwise
+      // next shift is number of leading zeros of remaining numerator.
+
+      numerZeros = spu_sel( spu_cntlz( newNum ), numerZeros, skip );
+      shift = (vec_int4)spu_sub( skip, numerZeros );
+
+      oneShifted = spu_rlmask( oneLeft, shift );
+      denomShifted = spu_rlmask( denomLeft, shift );
+
+      quot = spu_sel( newQuot, quot, skip );
+      numer = spu_sel( newNum, numer, spu_orc(skip,cont) );
+   } 
+   while ( anyCont );
+
+   res.quot = quot;
+   res.rem = numer;
+   return res;
+}
+
--- a/Extras/simdmathlibrary/spu/exp2f4.c
+++ b/Extras/simdmathlibrary/spu/exp2f4.c
@@ -0,0 +1,131 @@
+/* exp2f4 - 
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+
+/*
+ * FUNCTION
+ *	vec_float4 _exp2_v(vec_float4 x)
+ *
+ * DESCRIPTION
+ *	_exp2_v computes 2 raised to the input vector x. Computation is 
+ *	performed by observing the 2^(a+b) = 2^a * 2^b.
+ *	We decompose x into a and b (above) by letting.
+ *	a = ceil(x), b = x - a; 
+ *
+ *	2^a is easilty computed by placing a into the exponent
+ *	or a floating point number whose mantissa is all zeros.
+ *
+ *	2^b is computed using the following polynomial approximation.
+ *	(C. Hastings, Jr, 1955).
+ *
+ *                __7__
+ *		  \
+ *		   \ 
+ *	2^(-x) =   /     Ci*x^i
+ *                /____
+ *                 i=1
+ *
+ *	for x in the range 0.0 to 1.0
+ *
+ *	C0 =  1.0
+ *	C1 = -0.9999999995
+ *	C2 =  0.4999999206
+ *	C3 = -0.1666653019
+ *	C4 =  0.0416573475
+ *	C5 = -0.0083013598
+ *	C6 =  0.0013298820
+ *	C7 = -0.0001413161
+ *
+ *	This function does not handle out of range conditions. It
+ *	assumes that x is in the range (-128.0, 127.0]. Values outside
+ *	this range will produce undefined results.
+ */
+
+
+#define _EXP2F_H_LN2	0.69314718055995f	/* ln(2) */
+
+vector float
+exp2f4 (vector float x)
+{
+  vec_int4 ix;
+  vec_uint4 overflow, underflow;
+  vec_float4 frac, frac2, frac4;
+  vec_float4 exp_int, exp_frac;
+  vec_float4 result;
+  vec_float4 hi, lo;
+
+  vec_float4 bias;
+  /* Break in the input x into two parts ceil(x), x - ceil(x).
+   */
+  bias = (vec_float4)(spu_rlmaska((vec_int4)(x), -31));
+  bias = (vec_float4)(spu_andc(spu_splats(0x3F7FFFFFu), (vec_uint4)bias));
+  ix = spu_convts(spu_add(x, bias), 0);
+  frac = spu_sub(spu_convtf(ix, 0), x);
+  frac = spu_mul(frac, spu_splats(_EXP2F_H_LN2));
+
+  // !!! HRD Changing weird un-understandable and incorrect overflow handling code
+  //overflow = spu_sel((vec_uint4)spu_splats(0x7FFFFFFF), (vec_uint4)x, (vec_uchar16)spu_splats(0x80000000));  
+  overflow = spu_cmpgt(x, (vec_float4)spu_splats(0x4300FFFFu)); // !!! Biggest possible exponent to fit in range.
+  underflow = spu_cmpgt(spu_splats(-126.0f), x);
+
+  //exp_int = (vec_float4)(spu_sl(spu_add(ix, 127), 23)); // !!! HRD <- changing this to correct for
+                                                          // !!! overflow (x >= 127.999999f)
+  exp_int = (vec_float4)(spu_sl(spu_add(ix, 126), 23));   // !!! HRD <- add with saturation
+  exp_int = spu_add(exp_int, exp_int);                    // !!! HRD
+
+  /* Instruction counts can be reduced if the polynomial was
+   * computed entirely from nested (dependent) fma's. However, 
+   * to reduce the number of pipeline stalls, the polygon is evaluated 
+   * in two halves (hi amd lo). 
+   */
+  frac2 = spu_mul(frac, frac);
+  frac4 = spu_mul(frac2, frac2);
+
+  hi = spu_madd(frac, spu_splats(-0.0001413161f), spu_splats(0.0013298820f));
+  hi = spu_madd(frac, hi, spu_splats(-0.0083013598f));
+  hi = spu_madd(frac, hi, spu_splats(0.0416573475f));
+  lo = spu_madd(frac, spu_splats(-0.1666653019f), spu_splats(0.4999999206f));
+  lo = spu_madd(frac, lo, spu_splats(-0.9999999995f));
+  lo = spu_madd(frac, lo, spu_splats(1.0f));
+
+  exp_frac = spu_madd(frac4, hi, lo);
+  ix = spu_add(ix, spu_rlmask((vec_int4)(exp_frac), -23));
+  result = spu_mul(exp_frac, exp_int);
+
+  /* Handle overflow */
+  result = spu_sel(result, (vec_float4)spu_splats(0x7FFFFFFF), (vec_uchar16)overflow); 
+  result = spu_sel(result, (vec_float4)spu_splats(0), (vec_uchar16)underflow);
+  //result = spu_sel(result, (vec_float4)(overflow), spu_cmpgt((vec_uint4)(ix), 255));
+
+  return (result);
+}
--- a/Extras/simdmathlibrary/spu/expf4.c
+++ b/Extras/simdmathlibrary/spu/expf4.c
@@ -0,0 +1,63 @@
+/* expm1f4 - 
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+#define _EXPF_H_C1     ((float)-0.6931470632553101f)
+#define _EXPF_H_C2     ((float)-1.1730463525082e-7f)
+
+#define _EXPF_H_INVLN2 ((float)1.4426950408889634f)
+
+vector float
+expf4 (vector float x)
+{
+  vec_uchar16 xnegmask = (vec_uchar16)spu_cmpgt(spu_splats(0.0f), x);
+  vec_float4  goffset  = spu_sel(spu_splats((float) 0.5f),spu_splats((float)-0.5f),xnegmask);
+  vec_float4 g  = spu_mul(x, spu_splats(_EXPF_H_INVLN2));  
+  vec_int4 xexp = spu_convts(spu_add(g, goffset),0);
+  
+  g = spu_convtf(xexp, 0);
+  g = spu_madd(g, spu_splats(_EXPF_H_C2), spu_madd(g, spu_splats(_EXPF_H_C1), x));
+  vec_float4 z  = spu_mul(g, g);
+  vec_float4 a = spu_mul(z, spu_splats((float)0.0999748594f));
+  vec_float4 b = spu_mul(g, 
+			 spu_madd(z, 
+				  spu_splats((float)0.0083208258f), 
+				  spu_splats((float)0.4999999992f)
+				  )
+			 );
+  
+  vec_float4 foo  = divf4(spu_add(spu_splats(1.0f), spu_add(a, b)),
+			  spu_add(spu_splats(1.0f), spu_sub(a, b)));
+
+  return ldexpf4(foo, xexp);
+  
+}
--- a/Extras/simdmathlibrary/spu/expm1f4.c
+++ b/Extras/simdmathlibrary/spu/expm1f4.c
@@ -0,0 +1,54 @@
+/* expm1f4 - 
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+#define _EXPM1F_H_ln1by2 ((float)-0.6931471805599f)
+#define _EXPM1F_H_ln3by2 ((float) 0.4054651081082f)
+
+vector float
+expm1f4 (vector float x)
+{
+  vec_uchar16 nearzeromask = (vec_uchar16)spu_and(spu_cmpgt(x, spu_splats(_EXPM1F_H_ln1by2)),
+                                                  spu_cmpgt(spu_splats(_EXPM1F_H_ln3by2), x));
+  vec_float4 x2 = spu_mul(x,x);
+  vec_float4 d0, d1, n0, n1;
+  
+  d0 = spu_madd(x , spu_splats((float)-0.3203561199f), spu_splats((float)0.9483177697f));
+  d1 = spu_madd(x2, spu_splats((float) 0.0326527809f), d0);
+  
+  n0 = spu_madd(x , spu_splats((float)0.1538026623f), spu_splats((float)0.9483177732f));
+  n1 = spu_madd(x , spu_splats((float)0.0024490478f), spu_splats((float)0.0305274668f));
+  n1 = spu_madd(x2, n1, n0);
+ 
+  return spu_sel(spu_sub(expf4(x), spu_splats(1.0f)),
+                 spu_mul(x, divf4(n1, d1)),
+                 nearzeromask);
+}
--- a/Extras/simdmathlibrary/spu/fabsd2.c
+++ b/Extras/simdmathlibrary/spu/fabsd2.c
@@ -0,0 +1,37 @@
+/* fabsd2 - for each of two double slots, compute absolute value.
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+
+vector double fabsd2 (vector double x)
+{
+   return (vec_double2)spu_andc( (vec_ullong2)x, spu_splats(0x8000000000000000ull) );
+}
--- a/Extras/simdmathlibrary/spu/fabsf4.c
+++ b/Extras/simdmathlibrary/spu/fabsf4.c
@@ -0,0 +1,37 @@
+/* fabsf4 - for each of 4 float slots, compute absolute value.
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector float fabsf4 (vector float x)
+{
+   return (vec_float4)spu_andc( (vec_uint4)x, spu_splats(0x80000000) );
+}
+
--- a/Extras/simdmathlibrary/spu/fdimd2.c
+++ b/Extras/simdmathlibrary/spu/fdimd2.c
@@ -0,0 +1,46 @@
+/* fdimd2
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+/* fdim_v - compute the positive difference of x and y.
+ */
+vector double
+fdimd2 (vector double x, vector double y)
+{
+  vec_double2 v;
+  vec_uint4 mask;
+
+  v = spu_sub(x, y);
+  mask = (vec_uint4)spu_shuffle(v, v, ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}));
+  v = spu_andc(v, (vec_double2)spu_rlmaska(mask, -31));
+
+  return (v);
+}
--- a/Extras/simdmathlibrary/spu/fdimf4.c
+++ b/Extras/simdmathlibrary/spu/fdimf4.c
@@ -0,0 +1,38 @@
+/* fdimf4 - 
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector float
+fdimf4 (vector float x, vector float y)
+{
+  vec_float4 diff = spu_sub(x,y);
+  return spu_sel(spu_splats(0.0f),diff, spu_cmpgt(x,y));
+}
--- a/Extras/simdmathlibrary/spu/floord2.c
+++ b/Extras/simdmathlibrary/spu/floord2.c
@@ -0,0 +1,94 @@
+/* floord2 - for each of two doule slots, round up to smallest integer not more than the value.
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector double
+floord2(vector double in)
+{
+  vec_uchar16 swap_words = ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11});
+  vec_uchar16 splat_hi = ((vec_uchar16){0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11});
+  vec_uint4 one =  ((vec_uint4){0, 1, 0, 1});
+  vec_int4 exp, shift;
+  vec_uint4 mask, mask_1, frac_mask, addend, insert, pos, equal0, e_0, e_00, e_sign, exp_ge0;
+  vec_ullong2 sign = spu_splats(0x8000000000000000ULL);
+  vec_double2 in_hi, out;
+  vec_double2 one_d = spu_splats((double)1.0);
+  vec_uint4 zero = spu_splats((unsigned int)0x0);
+
+  /* This function generates the following component
+   * based upon the inputs.
+   *
+   *   mask = bits of the input that need to be replaced.
+   *   insert = value of the bits that need to be replaced
+   *   addend = value to be added to perform function.
+   *
+   * These are applied as follows:.
+   *
+   *   out = ((in & mask) | insert) + addend
+   */
+
+  in_hi = spu_shuffle(in, in, splat_hi);
+  exp = spu_and(spu_rlmask((vec_int4)in_hi, -20), 0x7FF);
+  shift = spu_sub(((vec_int4){1023, 1043, 1023, 1043}), exp);
+
+  /* clamp shift to the range 0 to -31.
+   */
+  shift = spu_sel(spu_splats((int)-32), spu_andc(shift, (vec_int4)spu_cmpgt(shift, 0)), spu_cmpgt(shift, -32));
+  frac_mask = spu_rlmask(((vec_uint4){0xFFFFF, -1, 0xFFFFF, -1}), shift);
+  exp_ge0 = spu_cmpgt(exp, 0x3FE);
+  mask = spu_orc(frac_mask, exp_ge0);
+
+  /* addend = ((in & mask) && (in >= 0)) ? mask+1 : 0
+   */
+  mask_1 = spu_addx(mask, one, spu_rlqwbyte(spu_genc(mask, one), 4));
+  pos = spu_cmpgt((vec_int4)in_hi, -1);
+  //pos = spu_cmpgt((vec_int4)in_hi, 0x0); //it is also work
+  equal0 = spu_cmpeq(spu_and((vec_uint4)in, mask), 0);
+  addend = spu_andc(spu_andc(mask_1, pos), spu_and(equal0, spu_shuffle(equal0, equal0, swap_words)));
+
+  /* insert 
+   */
+  e_0 = spu_cmpeq(spu_andc((vec_uint4)in, (vec_uint4)sign), zero);
+  e_00 = spu_and(e_0, spu_shuffle(e_0, e_0, swap_words));
+  // e_sign = spu_sel((vec_uint4)one_d, zero, spu_cmpeq( spu_and((vec_uint4)in_hi, spu_splats((unsigned int)0x80000000)), zero));
+  e_sign = spu_andc( (vec_uint4)one_d, spu_cmpeq( spu_and((vec_uint4)in_hi,spu_splats((unsigned int)0x80000000)), zero));
+  insert =spu_andc(spu_andc(e_sign, e_00), exp_ge0);
+
+  /* replace insert
+   */
+  in = spu_sel(in, (vec_double2)insert, spu_andc((vec_ullong2)mask, sign));
+
+  /* in + addend
+   */
+  out = (vec_double2)spu_addx((vec_uint4)in, addend, spu_rlqwbyte(spu_genc((vec_uint4)in, addend), 4));
+
+  return (out);
+}
--- a/Extras/simdmathlibrary/spu/floorf4.c
+++ b/Extras/simdmathlibrary/spu/floorf4.c
@@ -0,0 +1,54 @@
+/* floorf4 - for each of four float slots, round down to largest integer not greater than the value.
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector float
+floorf4 (vector float x)
+{
+   vec_int4   xi, xi1;
+   vec_uint4  inrange;
+   vec_float4 truncated, truncated1;
+    
+   // Find truncated value and one less.
+
+   inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x4b000000), x );
+
+   xi = spu_convts( x, 0 );
+   xi1 = spu_add( xi, -1 );
+
+   truncated = spu_sel( x, spu_convtf( xi, 0 ), inrange );
+   truncated1 = spu_sel( x, spu_convtf( xi1, 0 ), inrange );
+
+   // If truncated value is greater than input, subtract one.
+
+   return spu_sel( truncated, truncated1, spu_cmpgt( truncated, x ) );
+}
+
--- a/Extras/simdmathlibrary/spu/fmad2.c
+++ b/Extras/simdmathlibrary/spu/fmad2.c
@@ -0,0 +1,37 @@
+/* fmad2
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector double
+fmad2 (vector double x, vector double y, vector double z)
+{
+  return spu_madd(x,y,z);
+}
--- a/Extras/simdmathlibrary/spu/fmaf4.c
+++ b/Extras/simdmathlibrary/spu/fmaf4.c
@@ -0,0 +1,38 @@
+/* fmaf4
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector float
+fmaf4 (vector float x, vector float y, vector float z)
+{
+  return spu_madd(x,y,z);
+}
--- a/Extras/simdmathlibrary/spu/fmaxd2.c
+++ b/Extras/simdmathlibrary/spu/fmaxd2.c
@@ -0,0 +1,68 @@
+/* fmaxd2 - for each of two double slots, compute maximum of x and y
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+                          
+/* Return the maximum numeric value of their arguments. If one argument
+ * is a NaN, fmax returns the other value.  If both are NaNs, then a NaN
+ * is returned.
+ */
+
+vector double
+fmaxd2 (vector double x, vector double y)
+{
+  vec_ullong2 selector, denorm;
+  vec_double2 x_offset, y_offset, diff;
+  vec_uint4 nan_x, abs_x, gt, eq;
+  vec_uint4 sign = (vec_uint4){0x80000000, 0, 0x80000000, 0};
+  vec_uint4 infinity = (vec_uint4){0x7FF00000, 0, 0x7FF00000, 0};
+  vec_uint4 exp0 = (vec_uint4){0x3FF00000, 0, 0x3FF00000, 0};
+
+  /* If both x and y are denorm or zero, then set 0x3ff to exponent
+   */
+  denorm = (vec_ullong2)spu_cmpeq(spu_and((vec_uint4)spu_or(x, y), infinity), 0);
+  x_offset = spu_sel(x, spu_or(x, (vec_double2)exp0), denorm);
+  y_offset = spu_sel(y, spu_or(y, (vec_double2)exp0), denorm);
+
+  /* If x is a NaN, then select y as max
+   */
+  abs_x = spu_andc((vec_uint4)x, sign);
+  gt = spu_cmpgt(abs_x, infinity);
+  eq = spu_cmpeq(abs_x, infinity);
+  nan_x = spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4)));
+
+  diff = spu_sub(x_offset, y_offset);
+  selector = (vec_ullong2)spu_orc(nan_x, spu_cmpgt((vec_int4)diff, -1));
+  selector = spu_shuffle(selector, selector, ((vec_uchar16){0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11}));
+
+  return spu_sel(x, y, selector);
+}
+
--- a/Extras/simdmathlibrary/spu/fmaxf4.c
+++ b/Extras/simdmathlibrary/spu/fmaxf4.c
@@ -0,0 +1,40 @@
+/* fmaxf4 - for each of four float slots, compute maximum of x and y
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+                          
+vector float
+fmaxf4 (vector float x, vector float y)
+{
+   return spu_sel( x, y, spu_cmpgt( y, x ) );
+}
+
--- a/Extras/simdmathlibrary/spu/fmind2.c
+++ b/Extras/simdmathlibrary/spu/fmind2.c
@@ -0,0 +1,67 @@
+/* fmind2 - for each of two double slots, compute minimum of x and y
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+                          
+/* Return the minimum numeric value of their arguments. If one argument
+ * is a NaN, fmin returns the other value.  If both are NaNs, then a NaN
+ * is returned.
+ */
+
+vector double
+fmind2 (vector double x, vector double y)
+{
+  vec_ullong2 selector, denorm;
+  vec_double2 x_offset, y_offset, diff;
+  vec_uint4 nan_x, abs_x, gt, eq;
+  vec_uint4 sign = (vec_uint4){0x80000000, 0, 0x80000000, 0};
+  vec_uint4 infinity = (vec_uint4){0x7FF00000, 0, 0x7FF00000, 0};
+  vec_uint4 exp0 = (vec_uint4){0x3FF00000, 0, 0x3FF00000, 0};
+
+  /* If both x and y are denorm or zero, then set 0x3ff to exponent
+   */
+  denorm = (vec_ullong2)spu_cmpeq(spu_and((vec_uint4)spu_or(x, y), infinity), 0);
+  x_offset = spu_sel(x, spu_or(x, (vec_double2)exp0), denorm);
+  y_offset = spu_sel(y, spu_or(y, (vec_double2)exp0), denorm);
+
+  /* If x is a NaN, then select y as min
+   */
+  abs_x = spu_andc((vec_uint4)x, sign);
+  gt = spu_cmpgt(abs_x, infinity);
+  eq = spu_cmpeq(abs_x, infinity);
+  nan_x = spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4)));
+
+  diff = spu_sub(y_offset, x_offset);
+  selector = (vec_ullong2)spu_orc(nan_x, spu_cmpgt((vec_int4)diff, -1));
+  selector = spu_shuffle(selector, selector, ((vec_uchar16){0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11}));
+
+  return spu_sel(x, y, selector);
+}
+
--- a/Extras/simdmathlibrary/spu/fminf4.c
+++ b/Extras/simdmathlibrary/spu/fminf4.c
@@ -0,0 +1,40 @@
+/* fminf4 - for each of four float slots, compute minimum of x and y
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+         
+vector float
+fminf4 (vector float x, vector float y)
+{
+   return spu_sel( x, y, spu_cmpgt( x, y ) );
+}
+
--- a/Extras/simdmathlibrary/spu/fmodd2.c
+++ b/Extras/simdmathlibrary/spu/fmodd2.c
@@ -0,0 +1,302 @@
+/* fmodd2 - 
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+/* 
+ * a vector is returned that contains the remainder of xi/yi,
+ * for coresponding elements of vector double x and vector double y,
+ * as described below:
+ * if yi is 0, the result is 0
+ * otherwise, the funciton determines the unique signed integer value i
+ * such that the returned element is xi - i * yi with the same sign  as xi and
+ * magnitude less than |yi|
+ */
+
+static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb);
+static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb);
+static inline vec_uint4 _vec_eq64_half(vec_uint4 aa, vec_uint4 bb);
+
+vector double fmodd2(vector double x, vector double y)
+{
+  int shift0, shift1;
+  vec_uchar16 swap_words = (vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11};
+  vec_uchar16 propagate = (vec_uchar16){4,5,6,7, 192,192,192,192, 12,13,14,15, 192,192,192,192};
+  vec_uchar16 splat_hi = (vec_uchar16){0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11};
+  vec_uchar16 merge = (vec_uchar16){8,9,10,11,12,13,14,15, 24,25,26,27,28,29,30,31};
+  vec_int4 n, shift, power;
+  vec_uint4 z;
+  vec_uint4 x_hi, y_hi;
+  vec_uint4 abs_x, abs_y;
+  vec_uint4 exp_x, exp_y;
+  vec_uint4 zero_x, zero_y;
+  vec_uint4 mant_x, mant_x0, mant_x1, mant_y ;
+  vec_uint4 norm, denorm, norm0, norm1, denorm0, denorm1;
+  vec_uint4 result, result0, resultx, cnt, sign, borrow, mask;
+  vec_uint4 x_7ff, x_inf, x_nan, y_7ff, y_inf, y_nan,  is_normal;
+  vec_uint4  x_is_norm, y_is_norm, frac_x, frac_y, cnt_x, cnt_y, mant_x_norm, mant_y_norm;
+  vec_uint4  mant_x_denorm0, mant_x_denorm1, mant_x_denorm;
+  vec_uint4 mant_y_denorm0, mant_y_denorm1, mant_y_denorm;
+  vec_uint4 lsb = (vec_uint4)(spu_splats(0x0000000000000001ULL));
+  vec_uint4 sign_mask = (vec_uint4)(spu_splats(0x8000000000000000ULL));
+  vec_uint4 implied_1 = (vec_uint4)(spu_splats(0x0010000000000000ULL));
+  vec_uint4 mant_mask = (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL));
+
+  sign = spu_and((vec_uint4)x, sign_mask);
+
+  abs_x = spu_andc((vec_uint4)x, sign_mask);
+  abs_y = spu_andc((vec_uint4)y, sign_mask);
+
+  x_hi = spu_shuffle(abs_x, abs_x, splat_hi);
+  y_hi = spu_shuffle(abs_y, abs_y, splat_hi);
+
+  exp_x  = spu_rlmask(x_hi, -20);
+  exp_y  = spu_rlmask(y_hi, -20);
+
+  // y>x
+  resultx = _vec_gt64(abs_y, abs_x);
+
+  //is Inf,  is Nan
+  x_7ff = spu_cmpgt(x_hi, spu_splats((unsigned int)0x7fefffff));
+  x_inf = _vec_eq64_half(abs_x, ((vec_uint4){0x7ff00000,0x0,0x7ff00000,0x0}));
+  x_nan = spu_andc(x_7ff,  x_inf);
+
+  y_7ff = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7fefffff));
+  y_inf = _vec_eq64_half(abs_y, ((vec_uint4){0x7ff00000,0x0,0x7ff00000,0x0}));
+  y_nan = spu_andc(y_7ff,  y_inf);
+  
+  // is zero
+  zero_x = _vec_eq64_half(abs_x, spu_splats((unsigned int)0x0));
+  zero_y = _vec_eq64_half(abs_y, spu_splats((unsigned int)0x0));
+
+
+  /* Determine ilogb of abs_x and abs_y and 
+   * extract the mantissas (mant_x, mant_y)
+   */
+  /* change form*/
+  // 0 -> ! is_normal
+  // 0 don't care  (because (x=0, y!=0)match x<y, (x!=0 && y=0)match y=0,  (x==0 && y==0) resultx)
+
+  x_is_norm = spu_cmpgt(x_hi, spu_splats((unsigned int)0x000fffff));
+  y_is_norm = spu_cmpgt(y_hi, spu_splats((unsigned int)0x000fffff));
+
+  frac_x = spu_and((vec_uint4)x, mant_mask);
+  frac_y = spu_and((vec_uint4)y, mant_mask);
+
+  //cntlz(use when denorm)
+  cnt_x = spu_cntlz(frac_x);
+  cnt_x = spu_add(cnt_x, spu_and(spu_rlqwbyte(cnt_x, 4), spu_cmpeq(cnt_x, 32)));
+  cnt_x = spu_add(spu_shuffle(cnt_x, cnt_x, splat_hi), -11);
+
+  cnt_y = spu_cntlz(frac_y);
+  cnt_y = spu_add(cnt_y, spu_and(spu_rlqwbyte(cnt_y, 4), spu_cmpeq(cnt_y, 32)));
+  cnt_y = spu_add(spu_shuffle(cnt_y, cnt_y, splat_hi), -11);
+
+  /*
+  mant_x_norm = spu_andc(spu_sel(implied_1, abs_x, mant_mask), zero_x);
+  mant_y_norm = spu_andc(spu_sel(implied_1, abs_y, mant_mask), zero_y);
+  */
+  //norm
+  mant_x_norm = spu_or(implied_1, frac_x);
+  mant_y_norm = spu_or(implied_1, frac_y);
+
+  //denorm
+  shift0 = spu_extract(cnt_x, 0);
+  shift1 = spu_extract(cnt_x, 2);
+  mant_x_denorm0 = spu_rlmaskqwbyte((vec_uint4)frac_x, -8);
+  mant_x_denorm1 = spu_and((vec_uint4)frac_x, ((vec_uint4){0x0,0x0,-1,-1}));
+  mant_x_denorm0 = spu_slqwbytebc(spu_slqw(mant_x_denorm0, shift0), shift0);
+  mant_x_denorm1 = spu_slqwbytebc(spu_slqw(mant_x_denorm1, shift1), shift1);
+  mant_x_denorm = spu_shuffle(mant_x_denorm0, mant_x_denorm1, merge);
+  
+  //  vec_int4 shift_y = (vec_int4)spu_sub(cnt_y, spu_splats((unsigned int)11));
+  shift0 = spu_extract(cnt_y, 0);
+  shift1 = spu_extract(cnt_y, 2);
+  mant_y_denorm0 = spu_rlmaskqwbyte((vec_uint4)frac_y, -8);
+  mant_y_denorm1 = spu_and((vec_uint4)frac_y, ((vec_uint4){0x0,0x0,-1,-1}));
+
+  mant_y_denorm0 = spu_slqwbytebc(spu_slqw(mant_y_denorm0, shift0), shift0);
+  mant_y_denorm1 = spu_slqwbytebc(spu_slqw(mant_y_denorm1, shift1), shift1);
+  mant_y_denorm = spu_shuffle(mant_y_denorm0, mant_y_denorm1, merge);
+
+  // mant_x, mant_y( norm | denorm )
+  mant_x = spu_sel(mant_x_denorm, mant_x_norm, x_is_norm);
+  mant_y = spu_sel(mant_y_denorm, mant_y_norm, y_is_norm);
+
+  /*  power
+   */
+  vec_int4 power_x_norm = (vec_int4)exp_x;
+  vec_int4 power_x_denorm = spu_sub(spu_splats((int)1), (vec_int4)cnt_x);
+  vec_int4 power_x =   spu_sel(power_x_denorm, power_x_norm, x_is_norm);
+
+  vec_int4 power_y_norm = (vec_int4)exp_y;
+  vec_int4 power_y_denorm = spu_sub(spu_splats((int)1), (vec_int4)cnt_y);
+  vec_int4 power_y =   spu_sel(power_y_denorm, power_y_norm, y_is_norm);
+
+
+  /* Compute fixed point fmod of mant_x and mant_y. Set the flag,
+   * result0, to all ones if we detect that the final result is 
+   * ever 0.
+   */
+  result0 = spu_or(zero_x, zero_y);
+
+  //  n = spu_sub((vec_int4)logb_x, (vec_int4)logb_y); //zhao--
+  n = spu_sub(power_x, power_y);
+  mask = spu_cmpgt(n, 0);
+
+  while (spu_extract(spu_gather(mask), 0)) {
+    borrow = spu_genb(mant_x, mant_y);
+    borrow = spu_shuffle(borrow, borrow, propagate);
+    z = spu_subx(mant_x, mant_y, borrow);
+
+    result0 = spu_or(spu_and(spu_cmpeq(spu_or(z, spu_shuffle(z, z, swap_words)), 0), mask), result0);
+    
+    mant_x = spu_sel(mant_x, 
+		     spu_sel(spu_slqw(mant_x, 1), spu_andc(spu_slqw(z, 1), lsb), spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1)),
+		     mask);
+
+    n = spu_add(n, -1);														   
+    mask = spu_cmpgt(n, 0);
+    
+  }
+
+  borrow = spu_genb(mant_x, mant_y);
+  borrow = spu_shuffle(borrow, borrow, propagate);
+  z = spu_subx(mant_x, mant_y, borrow);
+  mant_x = spu_sel(mant_x, z, spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1));
+
+  result0 = spu_or(spu_cmpeq(spu_or(mant_x, spu_shuffle(mant_x, mant_x, swap_words)), 0), result0);
+
+
+
+  /* Convert the result back to floating point and restore
+   * the sign. If we flagged the result to be zero (result0),
+   * zero it. If we flagged the result to equal its input x,
+   * (resultx) then return x.
+   *
+   * Double precision generates a denorm for an output.
+   */
+
+  //  normal = spu_cmpgt((vec_int4)exp_y, 0);//zhao--
+
+  cnt = spu_cntlz(mant_x);
+  cnt = spu_add(cnt, spu_and(spu_rlqwbyte(cnt, 4), spu_cmpeq(cnt, 32)));
+  cnt = spu_add(spu_shuffle(cnt, cnt, splat_hi), -11);
+
+  mant_x0 = spu_rlmaskqwbyte(mant_x, -8);
+  mant_x1 = spu_and(mant_x,((vec_uint4){0x0,0x0,-1,-1}));
+
+  power =spu_sub(power_y, (vec_int4)cnt);
+  
+  is_normal = spu_cmpgt(power, 0);
+
+
+
+  //norm
+
+  shift0 = spu_extract(cnt, 0);			    
+  shift1 = spu_extract(cnt, 2);			    
+  /*
+  norm0 = spu_slqwbytebc(spu_slqw(spu_andc(mant_x0, implied_1), shift0), shift0);
+  norm1 = spu_slqwbytebc(spu_slqw(spu_andc(mant_x1, implied_1), shift1), shift1);
+  */
+  norm0 = spu_slqwbytebc(spu_slqw(mant_x0, shift0), shift0);
+  norm1 = spu_slqwbytebc(spu_slqw(mant_x1, shift1), shift1);
+
+  norm   = spu_shuffle(norm0, norm1, merge);
+
+  
+  //denorm
+  /*
+  shift = spu_add((vec_int4)exp_y, -1);
+  shift0 = spu_extract(shift, 0);
+  shift1 = spu_extract(shift, 2);
+  denorm0 = spu_slqwbytebc(spu_slqw(mant_x0, shift0), shift0);
+  denorm1 = spu_slqwbytebc(spu_slqw(mant_x1, shift1), shift1);
+  */
+  shift = spu_add(power, -1);
+  shift0 = spu_extract(shift, 0);
+  shift1 = spu_extract(shift, 2);
+
+  //  printf("result denorm: shift0=%d, shift1=%d\n",shift0, shift1);
+
+  denorm0 = spu_rlmaskqwbytebc(spu_rlmaskqw(norm0, shift0), 7+shift0);
+  denorm1 = spu_rlmaskqwbytebc(spu_rlmaskqw(norm1, shift1), 7+shift1);
+
+  denorm = spu_shuffle(denorm0, denorm1, merge);
+  
+  // merge
+  mant_x = spu_sel(denorm, norm, is_normal);
+
+  exp_y = (vec_uint4)power;
+  exp_y = spu_and(spu_rl(exp_y, 20), is_normal);
+
+  result = spu_sel(exp_y, spu_or(sign, mant_x),((vec_uint4){0x800FFFFF, -1, 0x800FFFFF, -1}));
+
+  //y>x  || y<=x
+  result = spu_sel(spu_andc(result, spu_rlmask(result0, -1)),
+		   (vec_uint4)x, resultx);
+  //y=+-inf  => 0
+  result = spu_sel(result, (vec_uint4)x, y_inf);
+  //x=+-inf  => NaN
+  result = spu_sel(result, ((vec_uint4){0x7ff80000, 0x0, 0x7ff80000, 0x0}), x_inf);
+  //y=0          =>  0
+  result = spu_andc(result, zero_y);
+
+  //x=NaN or y=NaN   => 0
+  result = spu_sel(result, (vec_uint4)x, x_nan);
+  result = spu_sel(result, (vec_uint4)y, y_nan);
+
+  return ((vec_double2)result);
+}
+
+
+/*
+ * extend spu_cmpgt function to 64bit data
+ */
+static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb)
+{
+  vec_uint4 gt = spu_cmpgt(aa, bb);  // aa > bb
+  vec_uint4 eq = spu_cmpeq(aa, bb);  // aa = bb
+  return spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4))); // only higher is right
+}
+static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb)
+{
+  vec_uint4 gt_hi = _vec_gt64_half(aa, bb); // only higher is right
+  return spu_shuffle(gt_hi, gt_hi, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
+}
+
+static inline vec_uint4 _vec_eq64_half(vec_uint4 aa, vec_uint4 bb)
+{
+  vec_uint4 eq = spu_cmpeq(aa, bb);
+  return spu_and(eq, spu_shuffle(eq, eq, ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11})));
+}
+
--- a/Extras/simdmathlibrary/spu/fmodf4.c
+++ b/Extras/simdmathlibrary/spu/fmodf4.c
@@ -0,0 +1,86 @@
+/* fmodf4 - for each of four float slots, compute remainder of x/y defined as x - truncated_integer(x/y) * y.
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+//
+// This returns an accurate result when |divf4(x,y)| < 2^20 and |x| < 2^128, and otherwise returns zero.
+// If x == 0, the result is 0.
+// If x != 0 and y == 0, the result is undefined.
+
+vector float
+fmodf4 (vector float x, vector float y)
+{
+   vec_float4 q, xabs, yabs, qabs, xabs2;
+   vec_int4   qi0, qi1, qi2;
+   vec_float4 i0, i1, i2, r1, r2, i;
+   vec_uint4  inrange;
+
+   // Find i = truncated_integer(|x/y|)
+
+   // If |divf4(x,y)| < 2^20, the quotient is at most off by 1.0.
+   // Thus i is either the truncated quotient, one less, or one greater.
+
+   q = divf4( x, y );
+   xabs = fabsf4( x );
+   yabs = fabsf4( y );
+   qabs = fabsf4( q );
+   xabs2 = spu_add( xabs, xabs );
+    
+   inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x49800000), q );
+   inrange = spu_and( inrange, spu_cmpabsgt( (vec_float4)spu_splats(0x7f800000), x ) );
+
+   qi1 = spu_convts( qabs, 0 );
+   qi0 = spu_add( qi1, -1 );
+   qi2 = spu_add( qi1, 1 );
+
+   i0 = spu_convtf( qi0, 0 );
+   i1 = spu_convtf( qi1, 0 );
+   i2 = spu_convtf( qi2, 0 );
+
+   // Correct i will be the largest one such that |x| - i*|y| >= 0.  Can test instead as 
+   // 2*|x| - i*|y| >= |x|:
+   // 
+   // With exact inputs, the negative-multiply-subtract gives the exact result rounded towards zero.  
+   // Thus |x| - i*|y| may be < 0 but still round to zero.  However, if 2*|x| - i*|y| < |x|, the computed
+   // answer will be rounded down to < |x|.  2*|x| can be represented exactly provided |x| < 2^128.
+
+   r1 = spu_nmsub( i1, yabs, xabs2 );
+   r2 = spu_nmsub( i2, yabs, xabs2 );
+
+   i = i0;
+   i = spu_sel( i1, i, spu_cmpgt( xabs, r1 ) );
+   i = spu_sel( i2, i, spu_cmpgt( xabs, r2 ) );
+   
+   i = copysignf4( i, q );
+
+   return spu_sel( spu_splats(0.0f), spu_nmsub( i, y, x ), inrange );
+}
+
--- a/Extras/simdmathlibrary/spu/fpclassifyd2.c
+++ b/Extras/simdmathlibrary/spu/fpclassifyd2.c
@@ -0,0 +1,94 @@
+/* fpclassifyd2 - for each element of vector x, return classification of x': FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+#include <math.h>
+
+#ifndef FP_NAN
+#define FP_NAN			(0)
+#endif
+#ifndef FP_INFINITE
+#define FP_INFINITE		(1)
+#endif
+#ifndef FP_ZERO
+#define FP_ZERO			(2)
+#endif
+#ifndef FP_SUBNORMAL
+#define FP_SUBNORMAL	(3)
+#endif
+#ifndef FP_NORMAL
+#define FP_NORMAL		(4)
+#endif
+
+vector signed long long
+fpclassifyd2 (vector double x)
+{
+   vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
+   vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
+   vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
+
+   vec_ullong2 sign = spu_splats(0x8000000000000000ull);
+   vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
+   vec_ullong2 signexpn = spu_splats(0xfff0000000000000ull);
+   vec_ullong2 zero = spu_splats(0x0000000000000000ull);
+
+   vec_ullong2 mask;
+   vec_llong2 classtype;
+   vec_uint4 cmpgt, cmpeq;
+
+   //FP_NORMAL: normal unless nan, infinity, zero, or denorm
+   classtype = spu_splats((long long)FP_NORMAL);
+
+   //FP_NAN: all-ones exponent and non-zero mantissa
+   cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)signexpn );
+   cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)signexpn );
+   mask = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ),
+          		  			   spu_and( spu_shuffle( cmpeq, cmpeq, even ), 
+   						   				spu_shuffle( cmpgt, cmpgt, odd ) ) );
+   classtype = spu_sel( classtype, spu_splats((long long)FP_NAN), mask );
+
+   //FP_INFINITE: all-ones exponent and zero mantissa
+   mask = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
+   classtype = spu_sel( classtype, spu_splats((long long)FP_INFINITE), mask );
+
+   //FP_ZERO: zero exponent and zero mantissa
+   cmpeq = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
+   mask = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
+   classtype = spu_sel( classtype, spu_splats((long long)FP_ZERO), mask );
+   
+   //FP_SUBNORMAL: zero exponent and non-zero mantissa
+   cmpeq = spu_cmpeq( (vec_uint4)spu_and( (vec_ullong2)x, expn ), (vec_uint4)zero );
+   cmpgt = spu_cmpgt( (vec_uint4)spu_andc( (vec_ullong2)x, signexpn ), (vec_uint4)zero );
+   mask = (vec_ullong2)spu_and( spu_shuffle( cmpeq, cmpeq, even ), 
+   				   				spu_or( cmpgt, spu_shuffle( cmpgt, cmpgt, swapEvenOdd ) ) );
+   classtype = spu_sel( classtype, spu_splats((long long)FP_SUBNORMAL), mask );
+
+   return classtype;
+}
--- a/Extras/simdmathlibrary/spu/fpclassifyf4.c
+++ b/Extras/simdmathlibrary/spu/fpclassifyf4.c
@@ -0,0 +1,78 @@
+/* fpclassifyf4 - for each element of vector x, return classification of x': FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+#include <math.h>
+
+#ifndef FP_NAN
+#define FP_NAN			(0)
+#endif
+#ifndef FP_INFINITE
+#define FP_INFINITE		(1)
+#endif
+#ifndef FP_ZERO
+#define FP_ZERO			(2)
+#endif
+#ifndef FP_SUBNORMAL
+#define FP_SUBNORMAL	(3)
+#endif
+#ifndef FP_NORMAL
+#define FP_NORMAL		(4)
+#endif
+
+vector signed int
+fpclassifyf4 (vector float x)
+{
+   vec_uint4 zero = spu_splats((unsigned int)0x00000000);
+
+   vec_uint4 mask;
+   vec_uint4 unclassified = spu_splats((unsigned int)0xffffffff);
+   vec_int4 classtype = (vec_int4)zero;
+
+   //FP_NAN: NaN not supported on SPU, never return FP_NAN
+
+   //FP_INFINITE: Inf not supported on SPU, never return FP_INFINITE
+
+   //FP_ZERO: zero exponent and zero mantissa
+   mask = spu_cmpeq( spu_andc( (vec_uint4)x, spu_splats((unsigned int)0x80000000)), zero );
+   classtype = spu_sel( classtype, spu_splats((int)FP_ZERO), mask );
+   unclassified = spu_andc( unclassified, mask );
+
+   //FP_SUBNORMAL: zero exponent and non-zero mantissa
+   mask = spu_and( spu_cmpeq( spu_and( (vec_uint4)x, spu_splats((unsigned int)0x7f800000)), zero ),
+   				   spu_cmpgt( spu_and( (vec_uint4)x, spu_splats((unsigned int)0x007fffff)), zero ) );
+   classtype = spu_sel( classtype, spu_splats((int)FP_SUBNORMAL), mask );
+   unclassified = spu_andc( unclassified, mask );
+
+   //FP_NORMAL: none of the above
+   classtype = spu_sel( classtype, spu_splats((int)FP_NORMAL), unclassified );
+
+   return classtype;
+}
--- a/Extras/simdmathlibrary/spu/frexpd2.c
+++ b/Extras/simdmathlibrary/spu/frexpd2.c
@@ -0,0 +1,95 @@
+/* frexpd2 - for each element of vector x, return the normalized fraction and store the exponent of x'
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+#include <math.h>
+
+#ifndef DBL_NAN
+#define DBL_NAN ((long long)0x7FF8000000000000ull)
+#endif
+
+vector double
+frexpd2 (vector double x, vector signed long long *pexp)
+{
+   vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
+   vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
+
+   vec_ullong2 maskdw = (vec_ullong2){0xffffffffffffffffull, 0ull};
+
+   vec_ullong2 sign = spu_splats(0x8000000000000000ull);
+   vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
+   vec_ullong2 zero = spu_splats(0x0000000000000000ull);
+
+   vec_ullong2 isnan, isinf, iszero;
+   vec_ullong2 e0, x0, x1;
+   vec_uint4 cmpgt, cmpeq, cmpzr;
+   vec_int4 lz, lz0, sh, ex;
+   vec_double2 fr, frac = (vec_double2)zero;
+
+   //NAN: x is NaN (all-ones exponent and non-zero mantissa)
+   cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
+   cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
+   isnan = (vec_ullong2)spu_or( cmpgt, spu_and( cmpeq, spu_rlqwbyte( cmpgt, -4 ) ) );
+   isnan = (vec_ullong2)spu_shuffle( isnan, isnan, even );
+   frac = spu_sel( frac, (vec_double2)spu_splats((long long)DBL_NAN), isnan );
+
+   //INF: x is infinite (all-ones exponent and zero mantissa)
+   isinf = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
+   frac = spu_sel( frac, x , isinf );
+
+   //x is zero (zero exponent and zero mantissa)
+   cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
+   iszero = (vec_ullong2)spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) );
+
+   frac = spu_sel( frac, (vec_double2)zero , iszero );
+   *pexp = spu_sel( *pexp, (vec_llong2)zero , iszero );
+
+   //Integer Exponent: if x is normal or subnormal
+
+   //...shift left to normalize fraction, zero shift if normal
+   lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) );
+   lz0 = (vec_int4)spu_shuffle( lz, lz, even );
+   sh = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)11) ), spu_cmpgt( lz0, (int)11 ) );
+   sh = spu_sel( sh, spu_add( sh, lz ), spu_cmpeq( lz0, (int)32 ) );
+
+   x0 = spu_slqw( spu_slqwbytebc( spu_and( (vec_ullong2)x, maskdw ), spu_extract(sh, 1) ), spu_extract(sh, 1) );
+   x1 = spu_slqw( spu_slqwbytebc( (vec_ullong2)x, spu_extract(sh, 3) ), spu_extract(sh, 3) );
+   fr = (vec_double2)spu_sel( x1, x0, maskdw );
+   fr = spu_sel( fr, (vec_double2)spu_splats(0x3FE0000000000000ull), expn );
+   fr = spu_sel( fr, x, sign );
+
+   e0 = spu_rlmaskqw( spu_rlmaskqwbyte(spu_and( (vec_ullong2)x, expn ),-6), -4 );
+   ex = spu_sel( spu_sub( (vec_int4)e0, spu_splats((int)1022) ), spu_sub( spu_splats((int)-1021), sh ), spu_cmpgt( sh, (int)0 ) );
+
+   frac = spu_sel( frac, fr, spu_nor( isnan, spu_or( isinf, iszero ) ) );
+   *pexp = spu_sel( *pexp, spu_extend( ex ), spu_nor( isnan, spu_or( isinf, iszero ) ) );
+
+   return frac;
+}
--- a/Extras/simdmathlibrary/spu/frexpf4.c
+++ b/Extras/simdmathlibrary/spu/frexpf4.c
@@ -0,0 +1,47 @@
+/* frexpf4 - for each element of vector x, return the normalized fraction and store the exponent of x'
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector float
+frexpf4 (vector float x, vector signed int *pexp)
+{
+  vec_int4 zeros = spu_splats((int)0);
+  vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(x, (vec_float4)zeros);
+ 
+  vec_int4 expmask = spu_splats((int)0x7F800000);
+  vec_int4 e1 = spu_and((vec_int4)x, expmask);
+  vec_int4 e2 = spu_sub(spu_rlmask(e1,-23), spu_splats((int)126));
+  *pexp = spu_sel(e2, zeros, zeromask);
+ 
+  vec_float4 m2 = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), (vec_uchar16)expmask);
+
+  return spu_sel(m2, (vec_float4)zeros, zeromask);
+}
--- a/Extras/simdmathlibrary/spu/hypotd2.c
+++ b/Extras/simdmathlibrary/spu/hypotd2.c
@@ -0,0 +1,40 @@
+/* hypotd2 - for each element of vector x and y, return the square root of (x')^2 + (y')^2
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector double
+hypotd2 (vector double x, vector double y)
+{
+  vec_double2 sum = spu_mul(x,x);
+  sum = spu_madd(y,y,sum);
+
+  return sqrtd2(sum);
+}
--- a/Extras/simdmathlibrary/spu/hypotf4.c
+++ b/Extras/simdmathlibrary/spu/hypotf4.c
@@ -0,0 +1,40 @@
+/* hypotf4 - for each element of vector x and y, return the square root of (x')^2 + (y')^2
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector float
+hypotf4 (vector float x, vector float y)
+{
+  vec_float4 sum = spu_mul(x,x);
+  sum = spu_madd(y,y,sum);
+  
+  return sqrtf4(sum);
+}
--- a/Extras/simdmathlibrary/spu/ilogbd2.c
+++ b/Extras/simdmathlibrary/spu/ilogbd2.c
@@ -0,0 +1,84 @@
+/* ilogbd2 - for each element of vector x, return integer exponent of normalized double x', FP_ILOGBNAN, or FP_ILOGB0
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+#include <math.h>
+
+#ifndef FP_ILOGB0
+#define FP_ILOGB0 ((int)0x80000001)
+#endif
+#ifndef FP_ILOGBNAN
+#define FP_ILOGBNAN ((int)0x7FFFFFFF)
+#endif
+
+vector signed long long
+ilogbd2 (vector double x)
+{
+   vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
+   vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
+   vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
+
+   vec_ullong2 sign = spu_splats(0x8000000000000000ull);
+   vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
+   vec_ullong2 zero = spu_splats(0x0000000000000000ull);
+
+   vec_ullong2 isnan, iszeroinf;
+   vec_llong2 ilogb = (vec_llong2)zero;
+   vec_llong2 e1, e2;
+   vec_uint4 cmpgt, cmpeq, cmpzr;
+   vec_int4 lz, lz0, lz1;
+
+   //FP_ILOGBNAN: x is NaN (all-ones exponent and non-zero mantissa)
+   cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
+   cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
+   isnan = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ),
+                  				spu_and( spu_shuffle( cmpeq, cmpeq, even ), 
+                           	 			 spu_shuffle( cmpgt, cmpgt, odd ) ) );
+   ilogb = spu_sel( ilogb, spu_splats((long long)FP_ILOGBNAN), isnan );
+
+   //FP_ILOGB0: x is zero (zero exponent and zero mantissa) or infinity (all-ones exponent and zero mantissa)
+   cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
+   iszeroinf = (vec_ullong2)spu_or( spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) ),
+					    			spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) ) );
+   ilogb = spu_sel( ilogb, spu_splats((long long)FP_ILOGB0), iszeroinf );
+
+   //Integer Exponent: if x is normal or subnormal, return unbiased exponent of normalized double x
+   e1 = (vec_llong2)spu_and( (vec_llong2)x, (vec_llong2)expn );
+   e2 = (vec_llong2)spu_rlmaskqw( spu_rlmaskqwbyte(e1,-6), -4 );
+
+   lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) );
+   lz0 = (vec_int4)spu_shuffle( lz, lz, even );
+   lz0 = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)12) ), spu_cmpgt( lz0, (int)11 ) );
+   lz1 = spu_sel( (vec_int4)zero, spu_shuffle( lz, lz, odd), spu_cmpeq( lz0, (int)20 ) );
+
+   ilogb = spu_sel( ilogb, spu_extend( spu_sub( spu_sub( (vec_int4)e2, spu_splats((int)1023)), spu_add( lz0, lz1 ) ) ), spu_nor( isnan, iszeroinf ) );
+
+   return ilogb;
+}
--- a/Extras/simdmathlibrary/spu/ilogbf4.c
+++ b/Extras/simdmathlibrary/spu/ilogbf4.c
@@ -0,0 +1,48 @@
+/* ilogbf4 - for each element of vector x, return integer exponent of x', FP_ILOGBNAN, or FP_ILOGB0
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+#include <math.h>
+
+#ifndef FP_ILOGB0
+#define FP_ILOGB0 ((int)0x80000001)
+#endif
+
+vector signed int
+ilogbf4 (vector float x)
+{
+  vec_int4 minus127 = spu_splats((int)-127);
+
+  vec_int4 e1 = spu_and((vec_int4)x, spu_splats((int)0x7F800000));
+  vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(e1, 0);
+  vec_int4 e2 = spu_add(spu_rlmask(e1,-23), minus127);
+  
+  return spu_sel(e2, (vec_int4)spu_splats(FP_ILOGB0), zeromask);
+}
--- a/Extras/simdmathlibrary/spu/irintf4.c
+++ b/Extras/simdmathlibrary/spu/irintf4.c
@@ -0,0 +1,39 @@
+/* irintf4 - for each of four float slots, round  to the nearest integer,
+             consistent with the current rounding model.
+             On SPU, the rounding mode for float is always towards zero.
+             vector singned int is returned.
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector signed int  irintf4(vector float  in)
+{
+  return spu_convts(in,0);
+}
--- a/Extras/simdmathlibrary/spu/iroundf4.c
+++ b/Extras/simdmathlibrary/spu/iroundf4.c
@@ -0,0 +1,55 @@
+/* iroundf4 - for each of four float slots, round  to the nearest integer,
+              halfway cases are rounded away form zero.
+
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector signed int iroundf4(vector float in)
+{
+  vec_int4 exp, out;
+  vec_uint4  addend;
+
+  /* Add 0.5 (fixed precision to eliminate rounding issues
+   */
+  exp = spu_sub(125, spu_and(spu_rlmask((vec_int4)in, -23), 0xFF));
+
+  addend = spu_and(spu_rlmask( spu_splats((unsigned int)0x1000000), exp), 
+		   spu_cmpgt((vec_uint4)exp, -31));
+  
+  in = (vec_float4)spu_add((vec_uint4)in, addend);
+
+
+  /* Truncate the result.
+   */
+  out = spu_convts(in,0);
+
+  return (out);
+}
--- a/Extras/simdmathlibrary/spu/is0denormd2.c
+++ b/Extras/simdmathlibrary/spu/is0denormd2.c
@@ -0,0 +1,46 @@
+/* is0denormd2 - for each of two double slots, if input equals 0 or denorm return mask of ones, else 0
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+
+vector unsigned long long
+is0denormd2 (vector double x)
+{
+   vec_double2 xexp;
+   vec_ullong2 cmp;
+   vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
+
+   xexp = (vec_double2)spu_and( (vec_ullong2)x, spu_splats(0x7ff0000000000000ull) );
+   cmp = (vec_ullong2)spu_cmpeq( (vec_uint4)xexp, (vec_uint4)spu_splats(0) );
+   cmp = spu_shuffle( cmp, cmp, even );
+
+   return cmp;
+}
--- a/Extras/simdmathlibrary/spu/is0denormf4.c
+++ b/Extras/simdmathlibrary/spu/is0denormf4.c
@@ -0,0 +1,37 @@
+/* is0denormf4 - for each element of vector x, return a mask of ones if x' is zero or denorm, zero otherwise
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector unsigned int
+is0denormf4 (vector float x)
+{
+   return spu_cmpeq( (vec_uint4)spu_and( (vec_uint4)x, spu_splats((unsigned int)0x7f800000) ), (vec_uint4)spu_splats(0x00000000) );
+}
--- a/Extras/simdmathlibrary/spu/isequald2.c
+++ b/Extras/simdmathlibrary/spu/isequald2.c
@@ -0,0 +1,54 @@
+/* isequald2 - for each of two double slots, if x = y return a mask of ones, else zero
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector unsigned long long
+isequald2 (vector double x, vector double y)
+{
+   vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
+   vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
+   vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
+   vec_ullong2 sign = spu_splats(0x8000000000000000ull);
+   vec_uint4 cmpeq_i, cmpeq_i_even, cmpeq_i_odd;
+   vec_ullong2 bothzero;
+   
+   cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
+
+   cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
+   cmpeq_i_odd = spu_shuffle( cmpeq_i, cmpeq_i, odd );
+
+   bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
+   bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
+   bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
+   
+   return spu_andc( spu_or( (vec_ullong2)spu_and( cmpeq_i_even, cmpeq_i_odd), bothzero), 
+   					spu_or( isnand2( x ), isnand2( y ) ) );
+}
--- a/Extras/simdmathlibrary/spu/isequalf4.c
+++ b/Extras/simdmathlibrary/spu/isequalf4.c
@@ -0,0 +1,37 @@
+/* isequalf4 - for each element of vector x and y, return a mask of ones if x' is equal to y', zero otherwise
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector unsigned int
+isequalf4 (vector float x, vector float y)
+{   
+	return spu_cmpeq(x, y);
+}
--- a/Extras/simdmathlibrary/spu/isfinited2.c
+++ b/Extras/simdmathlibrary/spu/isfinited2.c
@@ -0,0 +1,47 @@
+/* isfinited2 - for each element of vector x, return a mask of ones if x' is finite, zero otherwise
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector unsigned long long
+isfinited2 (vector double x)
+{
+   vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
+   vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
+   vec_ullong2 cmpr;
+
+   //Finite unless NaN or Inf, check for 'not all-ones exponent'
+   
+   cmpr = (vec_ullong2)spu_cmpgt( (vec_uint4)expn, (vec_uint4)spu_and( (vec_ullong2)x, expn ) );
+   cmpr = spu_shuffle( cmpr, cmpr, even);
+   
+   return cmpr;
+}
+
--- a/Extras/simdmathlibrary/spu/isfinitef4.c
+++ b/Extras/simdmathlibrary/spu/isfinitef4.c
@@ -0,0 +1,40 @@
+/* isfinitef4 - for each element of vector x, return a mask of ones if x' is finite, zero otherwise
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector unsigned int
+isfinitef4 (vector float x)
+{
+	(void)x;
+	
+	// NaN, INF not supported on SPU, result always a mask of ones
+	return spu_splats((unsigned int)0xffffffff);
+}
--- a/Extras/simdmathlibrary/spu/isgreaterd2.c
+++ b/Extras/simdmathlibrary/spu/isgreaterd2.c
@@ -0,0 +1,65 @@
+/* isgreaterd2 - for each of two double slots, if x > y return mask of ones, else 0
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector unsigned long long
+isgreaterd2 (vector double x, vector double y)
+{
+   vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
+   vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
+   vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
+   vec_ullong2 sign = spu_splats(0x8000000000000000ull);
+   vec_uint4   cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
+   vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
+   vec_ullong2 bothneg, bothzero;
+   
+   cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y );
+   cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
+   cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y );
+
+   cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
+   cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
+                                   spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
+   cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
+   cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );
+
+   bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
+   bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
+   bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
+
+   bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
+   bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
+   bothneg = spu_shuffle( bothneg, bothneg, even );
+   
+   return spu_andc( spu_sel( cmpgt_ll, cmplt_ll, bothneg ), 
+                    spu_or( bothzero, spu_or( isnand2 ( x ), isnand2 ( y ) ) ) );
+}
+
--- a/Extras/simdmathlibrary/spu/isgreaterequald2.c
+++ b/Extras/simdmathlibrary/spu/isgreaterequald2.c
@@ -0,0 +1,67 @@
+/* isgreaterequald2 - for each of two double slots, if x is greater or equal to y return a mask of ones, else zero
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector unsigned long long
+isgreaterequald2 (vector double x, vector double y)
+{
+   vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
+   vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
+   vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
+   vec_ullong2 sign = spu_splats(0x8000000000000000ull);
+   vec_uint4   cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
+   vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
+   vec_ullong2 bothneg, bothzero;
+   
+   cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y );
+   cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
+   cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y );
+
+   cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
+   cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
+                                   spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
+   cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
+   cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );
+
+   bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
+   bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
+   bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
+
+   cmpeq_ll = spu_or( cmpeq_ll, bothzero);
+
+   bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
+   bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
+   bothneg = spu_shuffle( bothneg, bothneg, even );
+   
+   return spu_andc( spu_or( spu_sel ( cmpgt_ll, cmplt_ll, bothneg ), cmpeq_ll ),
+                    spu_or( isnand2 ( x ), isnand2 ( y ) ) );
+}
+
--- a/Extras/simdmathlibrary/spu/isgreaterequalf4.c
+++ b/Extras/simdmathlibrary/spu/isgreaterequalf4.c
@@ -0,0 +1,41 @@
+/* isgreaterequalf4 - for each element of vector x and y, return a mask of ones if x' is greater than or equal to y', zero otherwise
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector unsigned int
+isgreaterequalf4 (vector float x, vector float y)
+{   
+	vec_uint4 var;
+	
+	var = spu_cmpgt(y, x);
+	
+	return spu_nor(var, var);
+}
--- a/Extras/simdmathlibrary/spu/isgreaterf4.c
+++ b/Extras/simdmathlibrary/spu/isgreaterf4.c
@@ -0,0 +1,37 @@
+/* isgreaterf4 - for each element of vector x and y, return a mask of ones if x' is greater than y', zero otherwise
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector unsigned int
+isgreaterf4 (vector float x, vector float y)
+{   
+	return spu_cmpgt(x, y);
+}
--- a/Extras/simdmathlibrary/spu/isinfd2.c
+++ b/Extras/simdmathlibrary/spu/isinfd2.c
@@ -0,0 +1,47 @@
+/* isinfd2 - for each of two double slots, if input equals +Inf or -Inf return mask of ones, else 0
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+
+vector unsigned long long
+isinfd2 (vector double x)
+{
+   vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
+   vec_double2 xabs;
+   vec_ullong2 cmp;
+
+   xabs = (vec_double2)spu_andc( (vec_ullong2)x, spu_splats(0x8000000000000000ull) );
+   cmp = (vec_ullong2)spu_cmpeq( (vec_uint4)xabs, (vec_uint4)spu_splats(0x7ff0000000000000ull) );
+   cmp = spu_and( cmp, spu_shuffle( cmp, cmp, swapEvenOdd ) );
+
+   return cmp;
+}
+
--- a/Extras/simdmathlibrary/spu/isinff4.c
+++ b/Extras/simdmathlibrary/spu/isinff4.c
@@ -0,0 +1,40 @@
+/* isinff4 - for each element of vector x, return a mask of ones if x' is INF, zero otherwise
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector unsigned int
+isinff4 (vector float x)
+{   
+	(void)x;
+	
+	// INF not supported on SPU, result always zero
+	return spu_splats((unsigned int)0x00000000);
+}
--- a/Extras/simdmathlibrary/spu/islessd2.c
+++ b/Extras/simdmathlibrary/spu/islessd2.c
@@ -0,0 +1,64 @@
+/* islessd2 - for each of two double slots, if x < y return a mask of ones, else zero
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector unsigned long long
+islessd2 (vector double x, vector double y)
+{
+   vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
+   vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
+   vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
+   vec_ullong2 sign = spu_splats(0x8000000000000000ull);
+   vec_uint4   cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
+   vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
+   vec_ullong2 bothneg, bothzero;
+   
+   cmpgt_i = spu_cmpgt( (vec_int4)y, (vec_int4)x );
+   cmpeq_i = spu_cmpeq( (vec_int4)y, (vec_int4)x );
+   cmpgt_ui = spu_cmpgt( (vec_uint4)y, (vec_uint4)x );
+
+   cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
+   cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
+                                   spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
+   cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
+   cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );
+
+   bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
+   bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
+   bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
+
+   bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
+   bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
+   bothneg = spu_shuffle( bothneg, bothneg, even );
+   
+   return spu_andc( spu_sel( cmpgt_ll, cmplt_ll, bothneg ), 
+                    spu_or( bothzero, spu_or( isnand2 ( x ), isnand2 ( y ) ) ) );
+}
--- a/Extras/simdmathlibrary/spu/islessequald2.c
+++ b/Extras/simdmathlibrary/spu/islessequald2.c
@@ -0,0 +1,66 @@
+/* islessequald2 - for each of two double slots, if x <= y return mask of ones, else 0
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector unsigned long long
+islessequald2 (vector double x, vector double y)
+{
+   vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
+   vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
+   vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
+   vec_ullong2 sign = spu_splats(0x8000000000000000ull);
+   vec_uint4   cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
+   vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
+   vec_ullong2 bothneg, bothzero;
+   
+   cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y );
+   cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
+   cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y );
+
+   cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
+   cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
+                                   spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
+   cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
+   cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );
+
+   bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
+   bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
+   bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
+
+   cmpeq_ll = spu_or( cmpeq_ll, bothzero);
+
+   bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
+   bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
+   bothneg = spu_shuffle( bothneg, bothneg, even );
+
+   return spu_andc( spu_or( spu_sel( cmplt_ll, cmpgt_ll, bothneg ), cmpeq_ll), 
+   					spu_or( isnand2 ( x ), isnand2 ( y ) ) );
+}
--- a/Extras/simdmathlibrary/spu/islessequalf4.c
+++ b/Extras/simdmathlibrary/spu/islessequalf4.c
@@ -0,0 +1,41 @@
+/* islessequalf4 - for each element of vector x and y, return a mask of ones if x' is less than or equal to y', zero otherwise
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector unsigned int
+islessequalf4 (vector float x, vector float y)
+{   
+	vec_uint4 var;
+
+	var = spu_cmpgt(x, y);
+	
+	return spu_nor(var, var);
+}
--- a/Extras/simdmathlibrary/spu/islessf4.c
+++ b/Extras/simdmathlibrary/spu/islessf4.c
@@ -0,0 +1,37 @@
+/* islessf4 - for each element of vector x and y, return a mask of ones if x' is less than y', zero otherwise
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector unsigned int
+islessf4 (vector float x, vector float y)
+{   
+	return spu_cmpgt(y, x);
+}
--- a/Extras/simdmathlibrary/spu/islessgreaterd2.c
+++ b/Extras/simdmathlibrary/spu/islessgreaterd2.c
@@ -0,0 +1,55 @@
+/* islessgreaterd2 - for each of two double slots, if x is less or greater than y return a mask of ones, else zero
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector unsigned long long
+islessgreaterd2 (vector double x, vector double y)
+{
+   vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
+   vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
+   vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
+   vec_ullong2 sign = spu_splats(0x8000000000000000ull);
+   vec_uint4 cmpeq_i, cmpeq_i_even, cmpeq_i_odd;
+   vec_ullong2 bothzero;
+   
+   cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
+
+   cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
+   cmpeq_i_odd = spu_shuffle( cmpeq_i, cmpeq_i, odd );
+   
+   bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
+   bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
+   bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
+   
+   return spu_andc( (vec_ullong2)spu_nand( cmpeq_i_even, cmpeq_i_odd),
+   					spu_or( bothzero, spu_or( isnand2 ( x ), isnand2 ( y ) ) ) );
+}
+
--- a/Extras/simdmathlibrary/spu/islessgreaterf4.c
+++ b/Extras/simdmathlibrary/spu/islessgreaterf4.c
@@ -0,0 +1,41 @@
+/* islessgreaterf4 - for each element of vector x and y, return a mask of ones if x' is less than or greater than y', zero otherwise
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector unsigned int
+islessgreaterf4 (vector float x, vector float y)
+{   
+	vec_uint4 var;
+
+	var = spu_cmpeq(x, y);
+	
+	return spu_nor(var, var);
+}
--- a/Extras/simdmathlibrary/spu/isnand2.c
+++ b/Extras/simdmathlibrary/spu/isnand2.c
@@ -0,0 +1,52 @@
+/* isnand2 - for each of two double slots, if input is any type of NaN return mask of ones, else 0
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector unsigned long long
+isnand2 (vector double x)
+{
+   vec_double2 xneg;
+   vec_ullong2 cmpgt, cmpeq, cmpnan;
+   vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
+   vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
+   vec_uint4   expmask = (vec_uint4)spu_splats(0xfff0000000000000ull);
+
+   xneg = (vec_double2)spu_or( (vec_ullong2)x, spu_splats(0x8000000000000000ull) );
+   cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)xneg, expmask );
+   cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)xneg, expmask );
+
+   cmpnan = spu_or( spu_shuffle( cmpgt, cmpgt, even ),
+                    spu_and( spu_shuffle( cmpeq, cmpeq, even ), 
+                             spu_shuffle( cmpgt, cmpgt, odd ) ) );
+
+   return cmpnan;
+}
+
--- a/Extras/simdmathlibrary/spu/isnanf4.c
+++ b/Extras/simdmathlibrary/spu/isnanf4.c
@@ -0,0 +1,40 @@
+/* isnanf4 - for each element of vector x, return a mask of ones if x' is NaN, zero otherwise
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector unsigned int
+isnanf4 (vector float x)
+{   
+	(void)x;
+	
+	// NaN not supported on SPU, result always zero
+	return spu_splats((unsigned int)0x00000000);
+}
--- a/Extras/simdmathlibrary/spu/isnormald2.c
+++ b/Extras/simdmathlibrary/spu/isnormald2.c
@@ -0,0 +1,49 @@
+/* isnormald2 - for each element of vector x, return a mask of ones if x' is normal, not a NaN or INF, zero otherwise
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector unsigned long long
+isnormald2 (vector double x)
+{
+   vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
+   vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
+   vec_ullong2 cmpr;
+
+   //Normal unless nan, infinite, denorm, or zero
+
+   //Check for 'not zero or all-ones exponent'
+   cmpr = (vec_ullong2)spu_and( spu_cmpgt( (vec_uint4)spu_and( (vec_ullong2)x, expn ), (vec_uint4)spu_splats(0x0000000000000000ull) ),
+				   spu_cmpgt( (vec_uint4)expn, (vec_uint4)spu_and( (vec_ullong2)x, expn ) ) );
+   cmpr = spu_shuffle( cmpr, cmpr, even);
+   
+   return cmpr;
+}
+
--- a/Extras/simdmathlibrary/spu/isnormalf4.c
+++ b/Extras/simdmathlibrary/spu/isnormalf4.c
@@ -0,0 +1,38 @@
+/* isnormalf4 - for each element of vector x, return a mask of ones if x' is normal, not a NaN or INF, zero otherwise
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector unsigned int
+isnormalf4 (vector float x)
+{
+	// NaN, INF not supported on SPU; normal unless zero
+	return spu_cmpabsgt(x, (vector float)spu_splats(0x00000000));
+}
--- a/Extras/simdmathlibrary/spu/isunorderedd2.c
+++ b/Extras/simdmathlibrary/spu/isunorderedd2.c
@@ -0,0 +1,63 @@
+/* isunorderedd2 - for each element of vector x and y, return a mask of ones if x' is unordered to y', zero otherwise
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector unsigned long long
+isunorderedd2 (vector double x, vector double y)
+{
+   vec_double2 neg;
+   vec_ullong2 cmpgt, cmpeq, cmpnanx, cmpnany;
+   vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
+   vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
+   vec_ullong2 expn = (vec_ullong2)spu_splats(0xfff0000000000000ull);
+   vec_ullong2 sign = (vec_ullong2)spu_splats(0x8000000000000000ull);
+
+   //Check if x is nan
+   neg = (vec_double2)spu_or( (vec_ullong2)x, sign );
+   cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)neg, (vec_uint4)expn );
+   cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)neg, (vec_uint4)expn );
+
+   cmpnanx = spu_or( spu_shuffle( cmpgt, cmpgt, even ),
+                    spu_and( spu_shuffle( cmpeq, cmpeq, even ), 
+                             spu_shuffle( cmpgt, cmpgt, odd ) ) );
+
+   //Check if y is nan
+   neg = (vec_double2)spu_or( (vec_ullong2)y, sign );
+   cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)neg, (vec_uint4)expn );
+   cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)neg, (vec_uint4)expn );
+
+   cmpnany = spu_or( spu_shuffle( cmpgt, cmpgt, even ),
+                    spu_and( spu_shuffle( cmpeq, cmpeq, even ), 
+                             spu_shuffle( cmpgt, cmpgt, odd ) ) );
+
+   return spu_or( cmpnanx, cmpnany );
+}
+
--- a/Extras/simdmathlibrary/spu/isunorderedf4.c
+++ b/Extras/simdmathlibrary/spu/isunorderedf4.c
@@ -0,0 +1,41 @@
+/* isunorderedf4 - for each element of vector x and y, return a mask of ones if x' is unordered to y', zero otherwise
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector unsigned int
+isunorderedf4 (vector float x, vector float y)
+{
+	(void)x;
+	(void)y;
+	
+	// NaN not supported on SPU, result always zero   
+	return spu_splats((unsigned int)0x00000000);
+}
--- a/Extras/simdmathlibrary/spu/ldexpd2.c
+++ b/Extras/simdmathlibrary/spu/ldexpd2.c
@@ -0,0 +1,263 @@
+/* ldexpd2 - Multiply Double by 2 Raised to its Power
+             For large elements of ex (overflow), returns HUGE_VALF
+             For small elements of ex (underflow), returns 0.
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector double 
+ldexpd2(vector double x, vector signed long long ex)
+{
+  vec_int4 e1, e2;
+  vec_int4 min = spu_splats(-2099);
+//  vec_int4 min = spu_splats(-2044);
+  vec_int4 max = spu_splats( 2098);
+//  vec_int4 max = spu_splats( 2046);
+  vec_uint4 cmp_min, cmp_max;
+  vec_uint4 shift = ((vec_uint4){20, 32, 20, 32});
+  vec_double2 f1, f2;
+  vec_double2 out;
+  vec_double2 in = x;
+  vec_int4 exp_in;
+
+  // check input data range
+  vec_int4 exp0 = spu_shuffle( (vec_int4)ex, (vec_int4)ex, ((vec_uchar16){4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15}));
+  vec_int4 dmy  = spu_shuffle( (vec_int4)spu_splats(0x10000), (vec_int4)ex, ((vec_uchar16){16,1,2,3, 16,1,2,3, 24,1,2,3,24,1,2,3}));
+  // (-)0xFFFFFFFF80000000 or (+)0x000000007FFFFFFF
+  vec_int4 msk_range = ((vec_int4){0,0x80000000, 0,0x80000000});
+  vec_int4 inrange = spu_addx( (vec_int4)ex, msk_range, spu_rlqwbyte(spu_genc((vec_int4)ex, msk_range), 4));
+  inrange = (vec_int4)spu_cmpeq( inrange, 0 );
+  inrange = spu_shuffle(inrange,inrange,((vec_uchar16){0,1,2,3,0,1,2,3,8,9,10,11,8,9,10,11}));
+
+  // select dummy over ranged data or input data
+  vec_int4 exp = spu_sel( dmy, exp0, (vec_uint4)inrange);
+  exp_in = exp;
+  /* Clamp the specified exponent to the range -2044 to 2046.
+   */
+  cmp_min = spu_cmpgt(exp, min);
+  cmp_max = spu_cmpgt(exp, max);
+  exp = spu_sel(min, exp, cmp_min);
+  exp = spu_sel(exp, max, cmp_max);
+
+  /* Generate the factors f1 = 2^e1 and f2 = 2^e2
+   */
+  e1 = spu_rlmaska(exp, -1);
+  e2 = spu_sub(exp, e1);
+
+  f1 = (vec_double2)spu_sl(spu_add(e1, 1023), shift);
+
+  vec_double2 otmp = spu_mul(x, f1);
+  vec_uint4 fpscr1 = spu_mffpscr();
+
+  f2 = (vec_double2)spu_sl(spu_add(e2, 1023), shift);
+
+  out = spu_mul(otmp, f2);
+  vec_uint4 fpscr2 = spu_mffpscr();
+
+  /* Compute the product x * 2^e1 * 2^e2
+   */
+//  out = spu_mul(spu_mul(x, f1), f2);
+
+  // check floating point register DENORM bit
+  vec_uint4 fpscr0, fpscr;
+  fpscr0 = spu_or(fpscr1, fpscr2);
+  fpscr = spu_shuffle(fpscr0, fpscr0, ((vec_uchar16){0x80,0x80,0x80,0x80,0x80,0x80,10,0x80,0x80,0x80,6,0x80,0x80,0x80,0x80,0x80}));
+  fpscr = spu_or(fpscr0, fpscr);
+  if ( __builtin_expect(spu_extract(fpscr, 1) == 0, 1) ) return out;
+
+
+  //////////////////////
+  // Denormalized calc//
+  //////////////////////
+
+  vec_uchar16 splat_msb = { 0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8};
+  vec_uint4 signmask = ((vec_uint4){0x80000000,0,0x80000000,0});
+  vec_int4 zeros = spu_splats(0);
+  vec_uchar16 msk_64_eq = ((vec_uchar16){4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11});
+
+  //check input was zero
+  vec_uint4 x_body = spu_and( (vec_uint4)x, ((vec_uint4){0x7FFFFFFF,-1,0x7FFFFFFF,-1}));
+  vec_uint4 x_zero = spu_cmpeq( x_body, (vec_uint4)zeros );
+  x_zero = spu_and( x_zero, spu_shuffle(x_zero,x_zero,msk_64_eq));
+
+  // check Denormalized input
+  vec_int4 cnt_zero = (vec_int4)spu_cntlz(x_body);
+  vec_uint4 is_den = (vec_uint4)spu_cmpgt(cnt_zero, 11);  // Denormalized data 000XXXXX XXXXXXXX
+  is_den = spu_shuffle( is_den, is_den, splat_msb);
+  is_den = spu_sel(is_den, (vec_uint4)zeros, x_zero);  // exclude zero from denormalized
+
+  // count 0bits for 64bit
+  vec_uint4 cnt_ex = (vec_uint4)spu_cmpgt(cnt_zero, 31);  // Denormalized data 00000000 XXXXXXXX
+  vec_int4 cnt_z = spu_shuffle( cnt_zero, cnt_zero, ((vec_uchar16){4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11}));
+  cnt_zero = spu_add(cnt_zero, spu_sel(zeros, cnt_z, cnt_ex));
+  cnt_zero = spu_shuffle(cnt_zero, cnt_zero, ((vec_uchar16){0,1,2,3,0,1,2,3,8,9,10,11,8,9,10,11}));
+
+  // extract each 64bit data
+  x_body = spu_and( (vec_uint4)x, ((vec_uint4){0x000FFFFF,-1,0x000FFFFF,-1}));
+  vec_uint4 mant0 = spu_shuffle(x_body, x_body, ((vec_uchar16){0,1, 2, 3, 4, 5, 6, 7,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80}));
+  vec_uint4 mant1 = spu_shuffle(x_body, x_body, ((vec_uchar16){8,9,10,11,12,13,14,15,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80}));
+  vec_uint4 sign = (vec_uint4)spu_rlmaska((vec_int4)exp_in, -31);
+  sign = spu_shuffle(sign, sign, splat_msb);
+
+  // set max shift count
+  vec_int4 sht = spu_add( cnt_zero, ((vec_int4){-11,-64,-11,-64}));
+
+  // denorm & exp+ shift left
+  vec_uint4 cmp = spu_cmpgt( sht, exp_in);
+  vec_int4 sht_l = spu_sel(sht, exp_in, cmp);
+  int shtl0 = spu_extract(sht_l, 0);
+  int shtl1 = spu_extract(sht_l, 2);
+  vec_uint4 mant0l = spu_slqwbytebc( spu_slqw(mant0, shtl0), shtl0 );
+  vec_uint4 mant1l = spu_slqwbytebc( spu_slqw(mant1, shtl1), shtl1 );
+  vec_int4 expp = spu_shuffle(spu_sub(exp_in, sht_l), zeros, ((vec_uchar16){0,1,2,3,0,1,2,3,8,9,10,11,8,9,10,11}));
+
+  exp0 = spu_sel( expp, exp_in, sign );   // select plus or minus caluc
+  vec_uint4 mantl = spu_shuffle( mant0l, mant1l, ((vec_uchar16){0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23}));
+  vec_uint4 mant  = spu_sel( mantl, (vec_uint4)x, sign);
+  exp  = spu_sel( exp_in, exp0, is_den );  // select denormalized
+  x = (vec_double2)spu_sel( (vec_uint4)x, mant, is_den);
+
+
+  //////////////////////////////////////////////////////////////////////////
+  // from ldexpf4
+  vec_int4 expmask = ((vec_int4){0x7FF00000, 0, 0x7FF00000, 0});
+  e1 = spu_and((vec_int4)x, expmask);
+  e2 = spu_rlmask(e1,-20);
+
+  vec_uchar16 maxmask = (vec_uchar16)spu_cmpgt(exp, 2046);
+  vec_uchar16 minmask = (vec_uchar16)spu_cmpgt(spu_splats(-2044), exp);
+  minmask = spu_or (minmask, (vec_uchar16)x_zero);
+
+  vec_int4 esum = spu_add(e2, exp);
+
+  maxmask = spu_or (maxmask, (vec_uchar16)spu_cmpgt(esum, 2046));
+  maxmask = spu_shuffle(maxmask, maxmask, splat_msb);
+//  maxmask = spu_and(maxmask, ((vec_uchar16)spu_splats((long long)0x7FFFFFFFFFFFFFFFLL)));
+  minmask = spu_or (minmask, (vec_uchar16)spu_cmpgt(zeros, esum));
+  minmask = spu_shuffle(minmask, minmask, splat_msb);
+  
+  // check denorm
+  vec_uint4 mxmask = spu_and(spu_cmpgt(e2, 0), ((vec_uint4){0x00100000,0,0x00100000,0})); // not denorm
+  vec_int4 esum2 = spu_sub(esum, (vec_int4)spu_rlmask(mxmask, -20));          // reverse to norm
+  vec_uint4 mrange = spu_and(spu_cmpgt(zeros, esum2), spu_cmpgt(esum2, -55)); // denorm range
+  mrange = spu_shuffle(mrange, mrange, splat_msb);
+
+  vec_int4 sht_r = spu_sel(spu_splats(-54),  esum2, spu_cmpgt(esum2, spu_splats(-54)) );
+  vec_int4 sht_rh = spu_add( sht_r, ((vec_int4){7,7,7,7}));
+
+  x_body = spu_or( x_body, mxmask );
+  mant0 = spu_shuffle(x_body, x_body, ((vec_uchar16){0,1, 2, 3, 4, 5, 6, 7,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80}));
+  mant1 = spu_shuffle(x_body, x_body, ((vec_uchar16){8,9,10,11,12,13,14,15,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80}));
+  vec_uint4 mant0r = spu_rlmaskqwbytebc( spu_rlmaskqw(mant0, spu_extract(sht_r, 0)), spu_extract(sht_rh,0) );
+  vec_uint4 mant1r = spu_rlmaskqwbytebc( spu_rlmaskqw(mant1, spu_extract(sht_r, 2)), spu_extract(sht_rh,2) );
+
+#ifdef LDEXPD2_ROUND
+  // check current round mode
+  fpscr = spu_shuffle(fpscr2, fpscr2, ((vec_uchar16){0x80,0x80,0x80,0x80,0,1,2,3,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80}));
+  fpscr0 = spu_and(fpscr, ((vec_uint4){0,0xc00,0,0}));
+  fpscr1 = spu_and(fpscr, ((vec_uint4){0,0x300,0,0}));
+
+  // prepare round data
+  vec_uint4 rnd0 = spu_slqwbytebc( spu_slqw( mant0r, 31), 31);
+  vec_uint4 rnd1 = spu_slqwbytebc( spu_slqw( mant1r, 31), 31);
+  vec_uint4 rnd0w = (vec_uint4)spu_cntb( (vec_uchar16)rnd0 );
+  vec_uint4 rnd1w = (vec_uint4)spu_cntb( (vec_uchar16)rnd1 );
+  rnd0w = spu_or( spu_slqwbyte(rnd0w,4), spu_slqwbyte(rnd0w,8));
+  rnd1w = spu_or( spu_slqwbyte(rnd1w,4), spu_slqwbyte(rnd1w,8));
+  rnd0 = spu_or( rnd0, rnd0w);
+  rnd1 = spu_or( rnd1, rnd1w);
+
+  // nearest
+  // check half
+  vec_uint4 hit0 = spu_cmpeq(rnd0, ((vec_uint4){0,0xc0000000,0,0}));  //odd + round out
+  vec_uint4 hit1 = spu_cmpeq(rnd1, ((vec_uint4){0,0xc0000000,0,0}));  //odd + round out
+  vec_uint4 add0 = spu_sel((vec_uint4)zeros, ((vec_uint4){0,1,0,0}), hit0);
+  vec_uint4 add1 = spu_sel((vec_uint4)zeros, ((vec_uint4){0,1,0,0}), hit1);
+  // check greater than half
+  rnd0 = spu_and( rnd0, ((vec_uint4){0,0x7FFFFFFF,0,0}));
+  rnd1 = spu_and( rnd1, ((vec_uint4){0,0x7FFFFFFF,0,0}));
+  hit0 = spu_cmpgt(rnd0, ((vec_uint4){0,0x40000000,0,0}));
+  hit1 = spu_cmpgt(rnd1, ((vec_uint4){0,0x40000000,0,0}));
+  add0 = spu_sel(add0, ((vec_uint4){0,1,0,0}), hit0);
+  add1 = spu_sel(add1, ((vec_uint4){0,1,0,0}), hit1);
+  // select if fp0
+  add0 = spu_sel((vec_uint4)zeros, add0, spu_cmpeq(fpscr0, (vec_uint4)zeros));
+  add1 = spu_sel((vec_uint4)zeros, add1, spu_cmpeq(fpscr1, (vec_uint4)zeros));
+
+  // toward zero do nothing
+  // upward
+  sign = spu_rlmaska((vec_uint4)in, -31);
+  vec_uint4 sign0 = spu_shuffle(sign, sign, ((vec_uchar16){0x80,0x80,0x80,0x80,0,0,0,0,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80}));
+  vec_uint4 sign1 = spu_shuffle(sign, sign, ((vec_uchar16){0x80,0x80,0x80,0x80,8,8,8,8,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80}));
+  vec_uint4 hit0w = spu_cmpgt(rnd0, ((vec_uint4){0,0,0,0}));
+  vec_uint4 hit1w = spu_cmpgt(rnd1, ((vec_uint4){0,0,0,0}));
+
+  hit0 = spu_andc(hit0w, sign0);
+  hit1 = spu_andc(hit1w, sign1);
+  hit0 = spu_and(hit0, spu_cmpeq(fpscr0, ((vec_uint4){0,0x800,0,0})));
+  hit1 = spu_and(hit1, spu_cmpeq(fpscr1, ((vec_uint4){0,0x200,0,0})));
+  // select if fp2
+  add0 = spu_sel(add0, ((vec_uint4){0,1,0,0}), hit0);
+  add1 = spu_sel(add1, ((vec_uint4){0,1,0,0}), hit1);
+
+  // downward
+  hit0 = spu_and(hit0w, sign0);
+  hit1 = spu_and(hit1w, sign1);
+  hit0 = spu_and(hit0, spu_cmpeq(fpscr0, ((vec_uint4){0,0xc00,0,0})));
+  hit1 = spu_and(hit1, spu_cmpeq(fpscr1, ((vec_uint4){0,0x300,0,0})));
+  // select if fp3
+  add0 = spu_sel(add0, ((vec_uint4){0,1,0,0}), hit0);
+  add1 = spu_sel(add1, ((vec_uint4){0,1,0,0}), hit1);
+
+  // calc round
+  mant0r = spu_addx(mant0r, add0, spu_rlqwbyte(spu_genc(mant0r, add0), 4));
+  mant1r = spu_addx(mant1r, add1, spu_rlqwbyte(spu_genc(mant1r, add1), 4));
+
+#endif  // LDEXPD2_ROUND
+
+  vec_uint4 mantr = spu_shuffle( mant0r, mant1r, ((vec_uchar16){0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23}));
+
+  // select right answer
+  x = spu_sel(x, (vec_double2)spu_sl(esum,20), (vec_uchar16)expmask);
+  x = spu_sel(x, (vec_double2)zeros, minmask);
+  x = spu_sel(x, (vec_double2)spu_splats((long long)0x7FEFFFFFFFFFFFFFLL), maxmask);
+
+  out = (vec_double2)spu_sel((vec_uint4)x , mantr, mrange);
+
+  // check Infinity,NaN
+  vec_uint4 is_inf = spu_cmpeq(e1, expmask);
+  is_inf = spu_and( is_inf, spu_shuffle(is_inf,is_inf,msk_64_eq));
+  out = (vec_double2)spu_sel((vec_uint4)out , (vec_uint4)in, is_inf);
+
+  out = spu_sel(out, in, (vec_ullong2)signmask);
+  return out;
+}
+
+
--- a/Extras/simdmathlibrary/spu/ldexpf4.c
+++ b/Extras/simdmathlibrary/spu/ldexpf4.c
@@ -0,0 +1,56 @@
+/* ldexpf4
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+vector float
+ldexpf4 (vector float x, vector signed int exp)
+{
+  vec_int4 zeros = spu_splats(0);
+
+  vec_uchar16 expmask = (vec_uchar16)spu_splats((int)0x7F800000);
+  vec_int4 e1 = spu_and((vec_int4)x, (vec_int4)expmask);
+  vec_int4 e2 = spu_rlmask(e1,-23);
+
+  vec_uchar16 maxmask = (vec_uchar16)spu_cmpgt(exp, 255);
+  vec_uchar16 minmask = (vec_uchar16)spu_cmpgt(spu_splats(-255), exp);
+  minmask = spu_or (minmask, (vec_uchar16)spu_cmpeq(x, (vec_float4)zeros));
+
+  vec_int4 esum = spu_add(e2, exp);
+
+  maxmask = spu_or (maxmask, (vec_uchar16)spu_cmpgt(esum, 255));
+  maxmask = spu_and(maxmask, (vec_uchar16)spu_splats((int)0x7FFFFFFF));
+  minmask = spu_or (minmask, (vec_uchar16)spu_cmpgt(zeros, esum));
+
+  x = spu_sel(x, (vec_float4)spu_sl(esum,23), expmask);
+  x = spu_sel(x, (vec_float4)zeros, minmask);
+  //x = spu_sel(x, (vec_float4)spu_splats((int)0xFFFFFFFF), maxmask);
+  x = spu_sel(x, (vec_float4)maxmask, maxmask);
+  return x;
+}
--- a/Extras/simdmathlibrary/spu/llabsi2.c
+++ b/Extras/simdmathlibrary/spu/llabsi2.c
@@ -0,0 +1,45 @@
+/* llabsi2 - returns absolute value of input.
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector signed long long
+llabsi2 (vector signed long long in)
+{
+  vec_uint4 sign = (vec_uint4)spu_rlmaska((vec_int4)in, -31);
+  sign = spu_shuffle(sign, sign, ((vec_uchar16){ 0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}));
+
+  vec_uint4 add_1 = ((vec_uint4){0,1,0,1});
+  vec_uint4 res = spu_nor((vec_uint4)in, (vec_uint4)in);
+  res = spu_addx( res, add_1, spu_slqwbyte(spu_genc(res, add_1), 4));
+  res = spu_sel( (vec_uint4)in, res, sign);
+
+  return ((vec_llong2)(res));
+}
--- a/Extras/simdmathlibrary/spu/lldiv.h
+++ b/Extras/simdmathlibrary/spu/lldiv.h
@@ -0,0 +1,123 @@
+/* Common functions for lldivi2/lldivu2
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __LLDIV_H__
+#define __LLDIV_H__
+
+#include <spu_intrinsics.h>
+
+static inline vector unsigned long long ll_spu_cntlz(vector unsigned long long x);
+static inline vector unsigned long long ll_spu_sl(vector unsigned long long x, vector unsigned long long count);
+static inline vector unsigned long long ll_spu_rlmask(vector unsigned long long x, vector unsigned long long count);
+static inline vector unsigned long long ll_spu_cmpeq_zero(vector unsigned long long x);
+static inline vector unsigned long long ll_spu_cmpgt(vector unsigned long long x, vector unsigned long long y);
+static inline vector unsigned long long ll_spu_sub(vector unsigned long long x, vector unsigned long long y);
+
+static inline vector unsigned long long 
+ll_spu_cntlz(vector unsigned long long x)
+{
+   vec_uint4 cnt;
+
+   cnt = spu_cntlz((vec_uint4)x);
+   cnt = spu_add(cnt, spu_and(spu_cmpeq(cnt, 32), spu_rlqwbyte(cnt, 4)));
+   cnt = spu_shuffle(cnt, cnt, ((vec_uchar16){0x80,0x80,0x80,0x80, 0,1,2,3, 0x80,0x80,0x80,0x80, 8,9,10,11}));
+
+   return (vec_ullong2)cnt;
+}
+
+static inline vector unsigned long long 
+ll_spu_sl(vector unsigned long long x, vector unsigned long long count)
+{
+   vec_ullong2 mask = (vec_ullong2){0xffffffffffffffffull, 0ull};
+   vec_ullong2 x_upper, x_lower;
+
+   // shift upper word
+   x_upper = spu_and(x, mask);
+   x_upper = spu_slqwbytebc(x_upper, spu_extract((vec_uint4)count, 1));
+   x_upper = spu_slqw(x_upper, spu_extract((vec_uint4)count, 1));
+  
+   // shift lower word
+   x_lower = spu_slqwbytebc(x, spu_extract((vec_uint4)count, 3));
+   x_lower = spu_slqw(x_lower, spu_extract((vec_uint4)count, 3));
+
+   return spu_sel(x_lower, x_upper, mask);
+}
+
+static inline vector unsigned long long 
+ll_spu_rlmask(vector unsigned long long x, vector unsigned long long count)
+{
+   vec_ullong2 mask = (vec_ullong2){0xffffffffffffffffull, 0ull};
+   vec_ullong2 x_upper, x_lower;
+   vec_uint4 cnt_byte;
+
+   cnt_byte = spu_add((vec_uint4)count, 7);
+
+   // shift upper word
+   x_upper = spu_rlmaskqwbytebc(x, spu_extract(cnt_byte, 1));
+   x_upper = spu_rlmaskqw(x_upper, spu_extract((vec_uint4)count, 1));
+  
+   // shift lower word
+   x_lower = spu_andc(x, mask);
+   x_lower = spu_rlmaskqwbytebc(x_lower, spu_extract(cnt_byte, 3));
+   x_lower = spu_rlmaskqw(x_lower, spu_extract((vec_uint4)count, 3));
+
+   return spu_sel(x_lower, x_upper, mask);
+}
+
+static inline vector unsigned long long 
+ll_spu_cmpeq_zero(vector unsigned long long x)
+{
+   vec_uint4 cmp;
+
+   cmp = spu_cmpeq((vec_uint4)x, 0);
+   return (vec_ullong2)spu_and(cmp, spu_shuffle(cmp, cmp, ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11})));
+}
+
+static inline vector unsigned long long 
+ll_spu_cmpgt(vector unsigned long long x, vector unsigned long long y)
+{
+   vec_uint4 gt;
+
+   gt = spu_cmpgt((vec_uint4)x, (vec_uint4)y);
+   gt = spu_sel(gt, spu_rlqwbyte(gt, 4), spu_cmpeq((vec_uint4)x, (vec_uint4)y));
+   return (vec_ullong2)spu_shuffle(gt, gt, ((vec_uchar16){0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11}));
+}
+
+static inline vector unsigned long long 
+ll_spu_sub(vector unsigned long long x, vector unsigned long long y)
+{
+  vec_uint4 borrow;
+
+  borrow = spu_genb((vec_uint4)x, (vec_uint4)y);
+  borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){4,5,6,7, 0xc0,0xc0,0xc0,0xc0, 12,13,14,15, 0xc0,0xc0,0xc0,0xc0}));
+  return (vec_ullong2)spu_subx((vec_uint4)x, (vec_uint4)y, borrow);
+}
+
+#endif // __LLDIV_H__
+
--- a/Extras/simdmathlibrary/spu/lldivi2.c
+++ b/Extras/simdmathlibrary/spu/lldivi2.c
@@ -0,0 +1,128 @@
+/* lldivi2 - 
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+#include "lldiv.h"
+
+static inline vector signed long long _negatell2 (vector signed long long x);
+
+static inline vector signed long long
+_negatell2 (vector signed long long x)
+{
+  vector signed int zero = (vector signed int){0,0,0,0};
+  vector signed int borrow;
+
+  borrow = spu_genb(zero, (vec_int4)x);
+  borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){4,5,6,7, 0xc0,0xc0,0xc0,0xc0, 12,13,14,15, 0xc0,0xc0,0xc0,0xc0}));
+  return (vec_llong2)spu_subx(zero, (vec_int4)x, borrow);
+}
+
+// lldivi2 - for each of two signed long long interger slots, compute quotient and remainder of 
+// numer/denom and store in lldivi2_t struct.  Divide by zero produces quotient = 0, remainder = numerator.
+
+lldivi2_t lldivi2 (vector signed long long numer, vector signed long long denom)
+{
+   lldivi2_t res;
+   vec_ullong2 numerAbs, denomAbs;
+   vec_uint4 numerPos, denomPos, quotNeg;
+
+   vec_uint4 denomZeros, numerZeros;
+   vec_int4 shift;
+   vec_ullong2 denomShifted, oneShifted, denomLeft, oneLeft;
+   vec_ullong2 quot, newQuot;
+   vec_ullong2 newNum, skip, cont;
+   int       anyCont;
+
+   // Determine whether result needs sign change
+
+   numerPos = spu_cmpgt((vec_int4)numer, -1);
+   numerPos = spu_shuffle(numerPos, numerPos, ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}));
+   denomPos = spu_cmpgt((vec_int4)denom, -1);
+   denomPos = spu_shuffle(denomPos, denomPos, ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}));
+   quotNeg = spu_xor( numerPos, denomPos );
+    
+   // Use absolute values of numerator, denominator
+
+   numerAbs = (vec_ullong2)spu_sel(_negatell2(numer), numer, (vec_ullong2)numerPos);
+   denomAbs = (vec_ullong2)spu_sel(_negatell2(denom), denom, (vec_ullong2)denomPos);
+
+   // Get difference of leading zeros.
+
+   denomZeros = (vec_uint4)ll_spu_cntlz( denomAbs );
+   numerZeros = (vec_uint4)ll_spu_cntlz( numerAbs );
+
+   shift = (vec_int4)spu_sub( denomZeros, numerZeros );
+
+   // Shift denom to align leading one with numerator's
+
+   denomShifted = ll_spu_sl( denomAbs, (vec_ullong2)shift );
+   oneShifted = ll_spu_sl( spu_splats(1ull), (vec_ullong2)shift );
+   oneShifted = spu_sel( oneShifted, spu_splats(0ull), ll_spu_cmpeq_zero( denomAbs ) );
+
+   // Shift left all leading zeros.
+
+   denomLeft = ll_spu_sl( denomAbs, (vec_ullong2)denomZeros );
+   oneLeft = ll_spu_sl( spu_splats(1ull), (vec_ullong2)denomZeros );
+
+   quot = spu_splats(0ull);
+
+   do
+   {
+      cont = ll_spu_cmpgt( oneShifted, spu_splats(0ull) );
+      anyCont = spu_extract( spu_gather((vec_uint4)cont ), 0 );
+
+      newQuot = spu_or( quot, oneShifted );
+
+      // Subtract shifted denominator from remaining numerator 
+      // when denominator is not greater.
+
+      skip = ll_spu_cmpgt( denomShifted, numerAbs );
+      newNum = ll_spu_sub( numerAbs, denomShifted );
+
+      // If denominator is greater, next shift is one more, otherwise
+      // next shift is number of leading zeros of remaining numerator.
+
+      numerZeros = (vec_uint4)spu_sel( ll_spu_cntlz( newNum ), (vec_ullong2)numerZeros, skip );
+      shift = (vec_int4)spu_sub( (vec_uint4)skip, numerZeros );
+
+      oneShifted = ll_spu_rlmask( oneLeft, (vec_ullong2)shift );
+      denomShifted = ll_spu_rlmask( denomLeft, (vec_ullong2)shift );
+
+      quot = spu_sel( newQuot, quot, skip );
+      numerAbs = spu_sel( newNum, numerAbs, spu_orc(skip,cont) );
+   } 
+   while ( anyCont );
+
+   res.quot = spu_sel((vec_llong2)quot, _negatell2((vec_llong2)quot), (vec_ullong2)quotNeg);
+   res.rem = spu_sel(_negatell2((vec_llong2)numerAbs), (vec_llong2)numerAbs, (vec_ullong2)numerPos);
+
+   return res;
+}
+
--- a/Extras/simdmathlibrary/spu/lldivu2.c
+++ b/Extras/simdmathlibrary/spu/lldivu2.c
@@ -0,0 +1,98 @@
+/* lldivu2 - 
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+#include "lldiv.h"
+
+// lldivu2 - for each of two unsigned long long interger slots, compute quotient and remainder of 
+// numer/denom and store in lldivu2_t struct.  Divide by zero produces quotient = 0, remainder = numerator.
+
+lldivu2_t lldivu2 (vector unsigned long long numer, vector unsigned long long denom)
+{
+   lldivu2_t res;
+   vec_uint4 denomZeros, numerZeros;
+   vec_int4 shift;
+   vec_ullong2 denomShifted, oneShifted, denomLeft, oneLeft;
+   vec_ullong2 quot, newQuot;
+   vec_ullong2 newNum, skip, cont;
+   int       anyCont;
+
+   // Get difference of leading zeros.
+
+   denomZeros = (vec_uint4)ll_spu_cntlz( denom );
+   numerZeros = (vec_uint4)ll_spu_cntlz( numer );
+
+   shift = (vec_int4)spu_sub( denomZeros, numerZeros );
+
+   // Shift denom to align leading one with numerator's
+
+   denomShifted = ll_spu_sl( denom, (vec_ullong2)shift );
+   oneShifted = ll_spu_sl( spu_splats(1ull), (vec_ullong2)shift );
+   oneShifted = spu_sel( oneShifted, spu_splats(0ull), ll_spu_cmpeq_zero( denom ) );
+
+   // Shift left all leading zeros.
+
+   denomLeft = ll_spu_sl( denom, (vec_ullong2)denomZeros );
+   oneLeft = ll_spu_sl( spu_splats(1ull), (vec_ullong2)denomZeros );
+
+   quot = spu_splats(0ull);
+
+   do
+   {
+      cont = ll_spu_cmpgt( oneShifted, spu_splats(0ull) );
+      anyCont = spu_extract( spu_gather((vec_uint4)cont ), 0 );
+
+      newQuot = spu_or( quot, oneShifted );
+
+      // Subtract shifted denominator from remaining numerator 
+      // when denominator is not greater.
+
+      skip = ll_spu_cmpgt( denomShifted, numer );
+      newNum = ll_spu_sub( numer, denomShifted );
+
+      // If denominator is greater, next shift is one more, otherwise
+      // next shift is number of leading zeros of remaining numerator.
+
+      numerZeros = (vec_uint4)spu_sel( ll_spu_cntlz( newNum ), (vec_ullong2)numerZeros, skip );
+      shift = (vec_int4)spu_sub( (vec_uint4)skip, numerZeros );
+
+      oneShifted = ll_spu_rlmask( oneLeft, (vec_ullong2)shift );
+      denomShifted = ll_spu_rlmask( denomLeft, (vec_ullong2)shift );
+
+      quot = spu_sel( newQuot, quot, skip );
+      numer = spu_sel( newNum, numer, spu_orc(skip,cont) );
+   } 
+   while ( anyCont );
+
+   res.quot = quot;
+   res.rem = numer;
+   return res;
+}
+
--- a/Extras/simdmathlibrary/spu/llrintd2.c
+++ b/Extras/simdmathlibrary/spu/llrintd2.c
@@ -0,0 +1,110 @@
+/* llrintd2 - rounds two doubles in to two nearest 64bit integer.
+              consistent with the current rounding mode.
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+// 
+// Handles no exception
+// over flow will return unspecified data
+
+vector signed long long
+llrintd2 (vector double in)
+{
+  int shift0, shift1;
+  vec_uchar16 splat_msb = ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8});
+  vec_int4 exp;
+  vec_uint4 mant, mant0, mant1, sign, mask, borrow;
+  vec_uint4 implied_one = ((vec_uint4){ 0, 0, 0x00100000, 0});
+  vec_uint4 exp_mask    = ((vec_uint4){-1,-1, 0xFFF00000, 0});
+  vec_double2 bias;
+
+  vec_uint4 vec_zero = ((vec_uint4){0,0,0,0});
+  // check denormalized
+  vec_uint4 exp_in = spu_and( (vec_uint4)in, 0x7FF00000 );
+  vec_uint4 is_denorm = spu_cmpeq( exp_in, 0 );
+  vec_uint4 ofs = spu_and( ((vec_uint4){0x00100000,0,0x00100000,0}), is_denorm);
+
+  // check zero
+  vec_uint4 abs_x = spu_and((vec_uint4)in, ((vec_uint4){0x7FFFFFFF,-1,0x7FFFFFFF,-1}));
+  vec_uint4 is_zerox = spu_cmpeq( abs_x, vec_zero);
+  is_zerox = spu_and( is_zerox, spu_shuffle(is_zerox,is_zerox, ((vec_uchar16){4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11})));
+  ofs = spu_sel( ofs, vec_zero, is_zerox);
+
+  vec_double2 xx = (vec_double2)spu_or( (vec_uint4)in, ofs );
+
+  /* Round the input according to the current rounding mode.
+   */
+  vec_uint4 is_large = spu_cmpgt( exp_in, 0x43200000 );
+  is_large = spu_shuffle(is_large,is_large,((vec_uchar16){0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8}));
+  bias = spu_sel((vec_double2)((vec_ullong2){0x4330000000000000ULL,0x4330000000000000ULL}), ((vec_double2){0.0,0.0}), (vec_ullong2)is_large);
+  bias = spu_sel(bias, xx, (vec_ullong2)spu_splats(0x8000000000000000ULL));
+
+//  bias = spu_sel((vec_double2)((vec_ullong2)spu_splats(0x4330000000000000ULL)), xx, 
+//		 (vec_ullong2)spu_splats(0x8000000000000000ULL));
+  mant = (vec_uint4)(spu_sub(spu_add(xx, bias), bias));
+
+  /* Determine how many bits to shift the mantissa to correctly
+   * align it into long long element 0.
+   */
+  exp = spu_and(spu_rlmask((vec_int4)mant, -20), 0x7FF);
+  exp = spu_add(exp, -1011);
+  shift0 = spu_extract(exp, 0);
+  shift1 = spu_extract(exp, 2);
+
+  mask = spu_cmpgt(exp, 0);
+  mask = spu_shuffle(mask, mask, splat_msb);
+
+  /* Algn mantissa bits
+   */
+  mant0 = spu_sel(spu_rlmaskqwbyte(mant, -8), implied_one, exp_mask);
+  mant1 = spu_sel(mant, implied_one, exp_mask);
+
+  mant0 = spu_slqwbytebc(spu_slqw(mant0, shift0), shift0);
+  mant1 = spu_slqwbytebc(spu_slqw(mant1, shift1), shift1);
+
+  mant = spu_shuffle(mant0, mant1, ((vec_uchar16){0,1,2,3,4,5,6,7, 16,17,18,19,20,21,22,23}));
+  mant = spu_and(mant, mask);
+
+  /* Compute the two's complement of the mantissa if the 
+   * input is negative.
+   */
+  sign = (vec_uint4)spu_rlmaska((vec_int4)xx, -31);
+  sign = spu_shuffle(sign, sign, splat_msb);
+
+  mant = spu_xor(mant, sign);
+  borrow = spu_genb(mant, sign);
+  borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){ 
+						   4,5,6,7, 192,192,192,192,
+						   12,13,14,15, 192,192,192,192}));
+  mant = spu_subx(mant, sign, borrow);
+
+  return ((vec_llong2)(mant));
+}
--- a/Extras/simdmathlibrary/spu/llrintf4.c
+++ b/Extras/simdmathlibrary/spu/llrintf4.c
@@ -0,0 +1,102 @@
+/* llrintf4 - rounds four floats in to four nearest 64bit integer.
+              On SPU the rounding mode for floats is always towards 0.
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+// 
+// Handles no exception
+// over flow will return unspecified data
+
+llroundf4_t 
+llrintf4 (vector float in)
+{
+  llroundf4_t res;
+  vec_int4 exp;
+  vec_uint4 mant0, mant1, mant2, mant3;
+  vec_uint4 mask, mask0, mask1;
+  vec_uint4 sign, sign0, sign1;
+  vec_uint4 borrow0, borrow1;
+  vec_uint4 res0, res1;
+  int shift0, shift1, shift2, shift3;
+
+  /* Place mantissa bits (including implied most signficant
+   * bit) into the most significant bits of element 3. Elements
+   * 0, 1, and 2 are zeroed.
+   */
+  mant0 = spu_sel(spu_rlmaskqwbyte((vec_uint4)in,-11), ((vec_uint4){0, 0, 0, 0x80000000}), ((vec_uint4){-1, -1, -1, 0x800000FF}));
+  mant1 = spu_sel(spu_rlmaskqwbyte((vec_uint4)in, -7), ((vec_uint4){0, 0, 0, 0x80000000}), ((vec_uint4){-1, -1, -1, 0x800000FF}));
+  mant2 = spu_sel(spu_rlmaskqwbyte((vec_uint4)in, -3), ((vec_uint4){0, 0, 0, 0x80000000}), ((vec_uint4){-1, -1, -1, 0x800000FF}));
+  mant3 = spu_sel(    spu_rlqwbyte((vec_uint4)in,  1), ((vec_uint4){0, 0, 0, 0x80000000}), ((vec_uint4){-1, -1, -1, 0x800000FF}));
+
+  /* Determine how many bits to shift the mantissa to correctly
+   * align it into long long element 0.
+   */
+  exp = spu_and(spu_rlmask((vec_int4)in, -23), 0xFF);
+  exp = spu_add(exp, -94);
+  shift0 = spu_extract(exp, 0);
+  shift1 = spu_extract(exp, 1);
+  shift2 = spu_extract(exp, 2);
+  shift3 = spu_extract(exp, 3);
+
+  /* Algn mantissa bits
+   */
+  mant0 = spu_slqwbytebc(spu_slqw(mant0, shift0), shift0);
+  mant1 = spu_slqwbytebc(spu_slqw(mant1, shift1), shift1);
+  mant2 = spu_slqwbytebc(spu_slqw(mant2, shift2), shift2);
+  mant3 = spu_slqwbytebc(spu_slqw(mant3, shift3), shift3);
+
+  mask  = spu_cmpgt(exp, 0);
+  mask0 = spu_shuffle(mask, mask, ((vec_uchar16){0,0,0,0,0,0,0,0,  4, 4, 4, 4, 4, 4, 4, 4}));
+  mask1 = spu_shuffle(mask, mask, ((vec_uchar16){8,8,8,8,8,8,8,8, 12,12,12,12,12,12,12,12}));
+
+  res0 = spu_shuffle(mant0, mant1,((vec_uchar16){0,1,2,3,4,5,6,7, 16,17,18,19,20,21,22,23}));
+  res1 = spu_shuffle(mant2, mant3,((vec_uchar16){0,1,2,3,4,5,6,7, 16,17,18,19,20,21,22,23}));
+  res0 = spu_and(res0, mask0);
+  res1 = spu_and(res1, mask1);
+
+  /* Compute the two's complement of the mantissa if the 
+   * input is negative.
+   */
+  sign = (vec_uint4)spu_rlmaska((vec_int4)in, -31);
+  sign0 = spu_shuffle(sign, sign, ((vec_uchar16){0,0,0,0,0,0,0,0,  4, 4, 4, 4, 4, 4, 4, 4}));
+  sign1 = spu_shuffle(sign, sign, ((vec_uchar16){8,8,8,8,8,8,8,8, 12,12,12,12,12,12,12,12}));
+
+  res0 = spu_xor(res0, sign0);
+  res1 = spu_xor(res1, sign1);
+  borrow0 = spu_genb(res0, sign0);
+  borrow1 = spu_genb(res1, sign1);
+  borrow0 = spu_shuffle(borrow0, borrow0, ((vec_uchar16){4,5,6,7,0xc0,0xc0,0xc0,0xc0, 12,13,14,15,0xc0,0xc0,0xc0,0xc0}));
+  borrow1 = spu_shuffle(borrow1, borrow1, ((vec_uchar16){4,5,6,7,0xc0,0xc0,0xc0,0xc0, 12,13,14,15,0xc0,0xc0,0xc0,0xc0}));
+  res.vll[0] = (vec_llong2)spu_subx(res0, sign0, borrow0);
+  res.vll[1] = (vec_llong2)spu_subx(res1, sign1, borrow1);
+
+  return res;
+}
--- a/Extras/simdmathlibrary/spu/llroundd2.c
+++ b/Extras/simdmathlibrary/spu/llroundd2.c
@@ -0,0 +1,92 @@
+/* llroundd2 - rounds two doubles in to two nearest 64bit integer.
+               0.5 will be rounded to far from 0
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+// 
+// Handles no exception
+// over flow will return unspecified data
+
+vector signed long long
+llroundd2 (vector double in)
+{
+  int shift0, shift1;
+  vec_uchar16 splat_msb = { 0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8};
+  vec_int4 exp;
+  vec_uint4 mant, mant0, mant1, sign, mask, borrow, addend;
+  vec_uint4 implied_one = { 0, 0, 0x00100000, 0};
+  vec_uint4 exp_mask = { -1, -1,0xFFF00000, 0};
+
+  /* Determine how many bits to shift the mantissa to correctly
+   * align it into long long element 0.
+   */
+  exp = spu_and(spu_rlmask((vec_int4)in, -20), 0x7FF);
+  exp = spu_add(exp, -1011);
+  shift0 = spu_extract(exp, 0);
+  shift1 = spu_extract(exp, 2);
+
+  mask = spu_cmpgt(exp, 0);
+  mask = spu_shuffle(mask, mask, splat_msb);
+
+  /* Algn mantissa bits
+   */
+  mant0 = spu_sel(spu_rlmaskqwbyte((vec_uint4)in, -8), implied_one, exp_mask);
+  mant1 = spu_sel((vec_uint4)in, implied_one, exp_mask);
+
+  mant0 = spu_slqwbytebc(spu_slqw(mant0, shift0), shift0);
+  mant1 = spu_slqwbytebc(spu_slqw(mant1, shift1), shift1);
+
+  mant = spu_shuffle(mant0, mant1, ((vec_uchar16){0,1,2,3,4,5,6,7, 16,17,18,19,20,21,22,23}));
+  mant = spu_and(mant, mask);
+
+  /* Perform round by adding 1 if the fraction bits are 
+   * greater than or equal to .5
+   */
+  addend = spu_shuffle(mant0, mant1, ((vec_uchar16){0x80,0x80,0x80,0x80,0x80,0x80,0x80,8, 0x80,0x80,0x80,0x80,0x80,0x80,0x80,24}));
+  addend = spu_rlmask(addend, -7);
+//  addend = spu_and(spu_rlqw(mant, 1), ((vec_uint4){ 0,1,0,1}));
+  mant = spu_addx(mant, addend, spu_rlqwbyte(spu_genc(mant, addend), 4));
+
+  /* Compute the two's complement of the mantissa if the 
+   * input is negative.
+   */
+  sign = (vec_uint4)spu_rlmaska((vec_int4)in, -31);
+  sign = spu_shuffle(sign, sign, splat_msb);
+
+  mant = spu_xor(mant, sign);
+  borrow = spu_genb(mant, sign);
+  borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){
+						   4,5,6,7, 192,192,192,192,
+						   12,13,14,15, 192,192,192,192}));
+  mant = spu_subx(mant, sign, borrow);
+
+  return ((vec_llong2)(mant));
+}
--- a/Extras/simdmathlibrary/spu/llroundf4.c
+++ b/Extras/simdmathlibrary/spu/llroundf4.c
@@ -0,0 +1,115 @@
+/* llroundf4 - rounds four floats in to four nearest 64bit integer.
+               0.5 will be rounded to far from 0
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+// 
+// Handles no exception
+// over flow will return unspecified data
+
+llroundf4_t 
+llroundf4 (vector float in)
+{
+  llroundf4_t res;
+  vec_int4 exp;
+  vec_uint4 mant0, mant1, mant2, mant3;
+  vec_uint4 mask, mask0, mask1;
+  vec_uint4 sign, sign0, sign1;
+  vec_uint4 addend0, addend1;
+  vec_uint4 borrow0, borrow1;
+  vec_uint4 res0, res1;
+  int shift0, shift1, shift2, shift3;
+
+  /* Place mantissa bits (including implied most signficant
+   * bit) into the most significant bits of element 3. Elements
+   * 0, 1, and 2 are zeroed.
+   */
+  mant0 = spu_sel(spu_rlmaskqwbyte((vec_uint4)in,-11), ((vec_uint4){0, 0, 0, 0x80000000}), ((vec_uint4){-1, -1, -1, 0x800000FF}));
+  mant1 = spu_sel(spu_rlmaskqwbyte((vec_uint4)in, -7), ((vec_uint4){0, 0, 0, 0x80000000}), ((vec_uint4){-1, -1, -1, 0x800000FF}));
+  mant2 = spu_sel(spu_rlmaskqwbyte((vec_uint4)in, -3), ((vec_uint4){0, 0, 0, 0x80000000}), ((vec_uint4){-1, -1, -1, 0x800000FF}));
+  mant3 = spu_sel(    spu_rlqwbyte((vec_uint4)in,  1), ((vec_uint4){0, 0, 0, 0x80000000}), ((vec_uint4){-1, -1, -1, 0x800000FF}));
+
+  /* Determine how many bits to shift the mantissa to correctly
+   * align it into long long element 0.
+   */
+  exp = spu_and(spu_rlmask((vec_int4)in, -23), 0xFF);
+  exp = spu_add(exp, -94);
+  shift0 = spu_extract(exp, 0);
+  shift1 = spu_extract(exp, 1);
+  shift2 = spu_extract(exp, 2);
+  shift3 = spu_extract(exp, 3);
+
+  /* Algn mantissa bits
+   */
+  mant0 = spu_slqwbytebc(spu_slqw(mant0, shift0), shift0);
+  mant1 = spu_slqwbytebc(spu_slqw(mant1, shift1), shift1);
+  mant2 = spu_slqwbytebc(spu_slqw(mant2, shift2), shift2);
+  mant3 = spu_slqwbytebc(spu_slqw(mant3, shift3), shift3);
+
+  mask  = spu_cmpgt(exp, 0);
+  mask0 = spu_shuffle(mask, mask, ((vec_uchar16){0,0,0,0,0,0,0,0,  4, 4, 4, 4, 4, 4, 4, 4}));
+  mask1 = spu_shuffle(mask, mask, ((vec_uchar16){8,8,8,8,8,8,8,8, 12,12,12,12,12,12,12,12}));
+
+  res0 = spu_shuffle(mant0, mant1,((vec_uchar16){0,1,2,3,4,5,6,7, 16,17,18,19,20,21,22,23}));
+  res1 = spu_shuffle(mant2, mant3,((vec_uchar16){0,1,2,3,4,5,6,7, 16,17,18,19,20,21,22,23}));
+  res0 = spu_and(res0, mask0);
+  res1 = spu_and(res1, mask1);
+
+  /* Perform round by adding 1 if the fraction bits are 
+   * greater than or equal to .5
+   */
+  addend0 = spu_shuffle(mant0, mant1, ((vec_uchar16){0x80,0x80,0x80,0x80,0x80,0x80,0x80,8, 0x80,0x80,0x80,0x80,0x80,0x80,0x80,24}));
+  addend1 = spu_shuffle(mant2, mant3, ((vec_uchar16){0x80,0x80,0x80,0x80,0x80,0x80,0x80,8, 0x80,0x80,0x80,0x80,0x80,0x80,0x80,24}));
+  addend0 = spu_rlmask(addend0, -7);
+  addend1 = spu_rlmask(addend1, -7);
+//  addend0 = spu_and(spu_rlqw(res0, 1), ((vec_uint4){ 0,1,0,1}));
+//  addend1 = spu_and(spu_rlqw(res1, 1), ((vec_uint4){ 0,1,0,1}));
+  res0 = spu_addx(res0, addend0, spu_rlqwbyte(spu_genc(res0, addend0), 4));
+  res1 = spu_addx(res1, addend1, spu_rlqwbyte(spu_genc(res1, addend1), 4));
+
+  /* Compute the two's complement of the mantissa if the 
+   * input is negative.
+   */
+  sign = (vec_uint4)spu_rlmaska((vec_int4)in, -31);
+  sign0 = spu_shuffle(sign, sign, ((vec_uchar16){0,0,0,0,0,0,0,0,  4, 4, 4, 4, 4, 4, 4, 4}));
+  sign1 = spu_shuffle(sign, sign, ((vec_uchar16){8,8,8,8,8,8,8,8, 12,12,12,12,12,12,12,12}));
+
+  res0 = spu_xor(res0, sign0);
+  res1 = spu_xor(res1, sign1);
+  borrow0 = spu_genb(res0, sign0);
+  borrow1 = spu_genb(res1, sign1);
+  borrow0 = spu_shuffle(borrow0, borrow0, ((vec_uchar16){4,5,6,7,0xc0,0xc0,0xc0,0xc0, 12,13,14,15,0xc0,0xc0,0xc0,0xc0}));
+  borrow1 = spu_shuffle(borrow1, borrow1, ((vec_uchar16){4,5,6,7,0xc0,0xc0,0xc0,0xc0, 12,13,14,15,0xc0,0xc0,0xc0,0xc0}));
+  res.vll[0] = (vec_llong2)spu_subx(res0, sign0, borrow0);
+  res.vll[1] = (vec_llong2)spu_subx(res1, sign1, borrow1);
+
+  return res;
+}
--- a/Extras/simdmathlibrary/spu/log10f4.c
+++ b/Extras/simdmathlibrary/spu/log10f4.c
@@ -0,0 +1,79 @@
+/* log10f4 - 
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+
+#define _LOG10F_H_loga2msb ((float)0.3010299205780f) 
+#define _LOG10F_H_loga2lsb ((float)7.5085978266e-8f)
+#define _LOG10F_H_logaemsb ((float)0.4342944622040f)
+#define _LOG10F_H_logaelsb ((float)1.9699272335e-8f)
+#define _LOG10F_H_logae   ((float)0.4342944819033f)
+
+#define _LOG10F_H_c0 ((float)(0.2988439998f)) 
+#define _LOG10F_H_c1 ((float)(0.3997655209f))
+#define _LOG10F_H_c2 ((float)(0.6666679125f))
+
+vector float
+log10f4 (vector float x)
+{
+  vec_int4 zeros = spu_splats((int)0);
+  vec_float4 ones = spu_splats(1.0f);
+  vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(x, (vec_float4)zeros);
+
+  vec_int4 expmask = spu_splats((int)0x7F800000);
+  vec_int4 xexp = spu_add( spu_rlmask(spu_and((vec_int4)x, expmask), -23), -126 );
+  x = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), (vec_uchar16)expmask);
+
+  vec_uint4  mask = spu_cmpgt(spu_splats((float)0.7071067811865f), x);
+  x    = spu_sel(x   , spu_add(x, x)                   , mask);
+  xexp = spu_sel(xexp, spu_sub(xexp,spu_splats((int)1)), mask);
+  
+  vec_float4 x1 = spu_sub(x , ones);
+  vec_float4 z  = divf4  (x1, spu_add(x, ones));
+  vec_float4 w  = spu_mul(z , z);
+  vec_float4 polyw;
+  polyw = spu_madd(spu_splats(_LOG10F_H_c0), w, spu_splats(_LOG10F_H_c1));
+  polyw = spu_madd(polyw                   , w, spu_splats(_LOG10F_H_c2));
+  
+  vec_float4 yneg = spu_mul(z, spu_msub(polyw, w, x1));
+  vec_float4 wnew = spu_convtf(xexp,0);
+  
+  vec_float4 zz1 = spu_madd(spu_splats(_LOG10F_H_logaemsb), x1, 
+			    spu_mul(spu_splats(_LOG10F_H_loga2msb),wnew));
+  vec_float4 zz2 = spu_madd(spu_splats(_LOG10F_H_logaelsb), x1,
+			    spu_madd(spu_splats(_LOG10F_H_loga2lsb), wnew, 
+				     spu_mul(spu_splats(_LOG10F_H_logae), yneg))
+			    );
+  
+  return spu_sel(spu_add(zz1,zz2), (vec_float4)zeromask, zeromask);
+}
+
+
--- a/Extras/simdmathlibrary/spu/log1pf4.c
+++ b/Extras/simdmathlibrary/spu/log1pf4.c
@@ -0,0 +1,51 @@
+/* log1pf4 - 
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+vector float
+log1pf4 (vector float x)
+{
+  vec_uchar16 nearzeromask = (vec_uchar16)spu_and(spu_cmpgt(x, spu_splats(-0.5f)),
+                                                  spu_cmpgt(spu_splats(0.5f), x));
+  vec_float4 x2 = spu_mul(x,x);
+  vec_float4 d0, d1, n0, n1;
+
+  d0 = spu_madd(x , spu_splats((float)1.5934420741f), spu_splats((float)0.8952856868f));
+  d1 = spu_madd(x , spu_splats((float)0.1198195734f), spu_splats((float)0.8377145063f));
+  d1 = spu_madd(x2, d1, d0);
+  
+  n0 = spu_madd(x , spu_splats((float)1.1457993413f), spu_splats((float)0.8952856678f));
+  n1 = spu_madd(x , spu_splats((float)0.0082862580f), spu_splats((float)0.3394238808f));
+  n1 = spu_madd(x2, n1, n0);
+ 
+  return spu_sel(logf4(spu_add(x, spu_splats(1.0f))),
+                 spu_mul(x, divf4(n1, d1)),
+                 nearzeromask);
+}
--- a/Extras/simdmathlibrary/spu/log2f4.c
+++ b/Extras/simdmathlibrary/spu/log2f4.c
@@ -0,0 +1,71 @@
+/* log2f4
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+#define _LOG2F_H_l2emsb ((float)1.4426950216293f) 
+#define _LOG2F_H_l2elsb ((float)1.9259629911e-8f) 
+#define _LOG2F_H_l2e   ((float)1.4426950408890f) 
+
+#define _LOG2F_H_c0 ((float)(0.2988439998f)) 
+#define _LOG2F_H_c1 ((float)(0.3997655209f))
+#define _LOG2F_H_c2 ((float)(0.6666679125f))
+
+vector float
+log2f4 (vector float x)
+{
+  vec_int4 zeros = spu_splats((int)0);
+  vec_float4 ones = spu_splats(1.0f);
+  vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(x, (vec_float4)zeros);
+
+  vec_int4 expmask = spu_splats((int)0x7F800000);
+  vec_int4 xexp = spu_add( spu_rlmask(spu_and((vec_int4)x, expmask), -23), -126 );
+  x = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), (vec_uchar16)expmask);
+
+
+  vec_uint4  mask = spu_cmpgt(spu_splats((float)0.7071067811865f), x);
+  x    = spu_sel(x   , spu_add(x, x)                   , mask);
+  xexp = spu_sel(xexp, spu_sub(xexp,spu_splats((int)1)), mask);
+  
+  vec_float4 x1 = spu_sub(x , ones);
+  vec_float4 z  = divf4(x1, spu_add(x, ones));
+  vec_float4 w  = spu_mul(z , z);
+  vec_float4 polyw;
+  polyw = spu_madd(spu_splats(_LOG2F_H_c0), w, spu_splats(_LOG2F_H_c1));
+  polyw = spu_madd(polyw                  , w, spu_splats(_LOG2F_H_c2));
+  
+  vec_float4 yneg = spu_mul(z, spu_msub(polyw, w, x1));
+  vec_float4 zz1 = spu_madd(spu_splats(_LOG2F_H_l2emsb), x1, spu_convtf(xexp,0));
+  vec_float4 zz2 = spu_madd(spu_splats(_LOG2F_H_l2elsb), x1,
+			    spu_mul(spu_splats(_LOG2F_H_l2e), yneg)
+			    );
+  
+  return spu_sel(spu_add(zz1,zz2), (vec_float4)zeromask, zeromask);
+}
--- a/Extras/simdmathlibrary/spu/logbd2.c
+++ b/Extras/simdmathlibrary/spu/logbd2.c
@@ -0,0 +1,93 @@
+/* logbd2 - for each element of vector x, return the exponent of normalized double x' as floating point value
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+#include <math.h>
+
+#ifndef HUGE_VALL
+#define HUGE_VALL __builtin_huge_vall ()
+#endif
+
+#ifndef DBL_INF
+#define DBL_INF ((long long)0x7FF0000000000000ull)
+#endif
+
+#ifndef DBL_NAN
+#define DBL_NAN ((long long)0x7FF8000000000000ull)
+#endif
+
+vector double
+logbd2 (vector double x)
+{
+   vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
+   vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
+   vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
+
+   vec_ullong2 sign = spu_splats(0x8000000000000000ull);
+   vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
+   vec_ullong2 zero = spu_splats(0x0000000000000000ull);
+
+   vec_ullong2 isnan, isinf, iszero;
+   vec_double2 logb = (vec_double2)zero;
+   vec_llong2 e1, e2;
+   vec_uint4 cmpgt, cmpeq, cmpzr;
+   vec_int4 lz, lz0, lz1;
+
+   //NAN: x is NaN (all-ones exponent and non-zero mantissa)
+   cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
+   cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
+   isnan = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ),
+                  				spu_and( spu_shuffle( cmpeq, cmpeq, even ), 
+                           	 			 spu_shuffle( cmpgt, cmpgt, odd ) ) );
+   logb = spu_sel( logb, (vec_double2)spu_splats((long long)DBL_NAN), isnan );
+
+   //INF: x is infinite (all-ones exponent and zero mantissa)
+   isinf = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
+   logb = spu_sel( logb, (vec_double2)spu_splats((long long)DBL_INF), isinf );
+
+   //HUGE_VAL: x is zero (zero exponent and zero mantissa)
+   cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
+   iszero = (vec_ullong2)spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) );
+   logb = spu_sel( logb, (vec_double2)spu_splats((long long)-HUGE_VALL), iszero );
+
+   //Integer Exponent: if x is normal or subnormal, return unbiased exponent of normalized double x
+   e1 = (vec_llong2)spu_and( (vec_llong2)x, (vec_llong2)expn );
+   e2 = (vec_llong2)spu_rlmask((vec_uint4)e1, -20);
+
+   lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) );
+   lz0 = (vec_int4)spu_shuffle( lz, lz, even );
+   lz0 = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)12) ), spu_cmpgt( lz0, (int)11 ) );
+   lz1 = spu_sel( (vec_int4)zero, spu_shuffle( lz, lz, odd), spu_cmpeq( lz0, (int)20 ) );
+
+   logb = spu_sel( logb, spu_extend( spu_convtf( spu_sub( spu_sub( (vec_int4)e2, spu_splats((int)1023) ), spu_add( lz0, lz1 ) ), 0 ) ), 
+   						 spu_nor( isnan, spu_or( isinf, iszero ) ) );
+
+   return logb;
+}
--- a/Extras/simdmathlibrary/spu/logbf4.c
+++ b/Extras/simdmathlibrary/spu/logbf4.c
@@ -0,0 +1,46 @@
+/* logbf4 - for each element of vector x, return the exponent of x' as floating point value
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+#include <math.h>
+
+#ifndef HUGE_VALF
+#define HUGE_VALF __builtin_huge_valf ()
+#endif
+
+vector float
+logbf4 (vector float x)
+{
+  vec_int4 e1 = spu_and((vec_int4)x, spu_splats((int)0x7F800000));
+  vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(e1, 0);
+  e1 = spu_sub(e1, spu_splats((int)0x3F800000));
+  return spu_sel(spu_convtf(e1,23), (vec_float4)spu_splats(-HUGE_VALF), zeromask);
+}
+
--- a/Extras/simdmathlibrary/spu/logf4.c
+++ b/Extras/simdmathlibrary/spu/logf4.c
@@ -0,0 +1,70 @@
+/* logf4 - for each of four slots, calculate the natural log
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+
+#define _LOGF_H_ln2msb ((float)(0.6931470632553f))
+#define _LOGF_H_ln2lsb ((float)(1.1730463525e-7f))
+
+#define _LOGF_H_c0 ((float)(0.2988439998f)) 
+#define _LOGF_H_c1 ((float)(0.3997655209f))
+#define _LOGF_H_c2 ((float)(0.6666679125f))
+
+vector float
+logf4 (vector float x)
+{
+  vec_int4 zeros = spu_splats((int)0);
+  vec_float4 ones = spu_splats(1.0f);
+  vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(x, (vec_float4)zeros);
+
+  vec_int4 expmask = spu_splats((int)0x7F800000);
+  vec_int4 xexp = spu_add( spu_rlmask(spu_and((vec_int4)x, expmask), -23), -126 );
+  x = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), (vec_uchar16)expmask);
+
+
+  vec_uint4  mask = spu_cmpgt(spu_splats((float)0.7071067811865f), x);
+  x    = spu_sel(x   , spu_add(x, x)                   , mask);
+  xexp = spu_sel(xexp, spu_sub(xexp,spu_splats((int)1)), mask);
+  
+  vec_float4 x1 = spu_sub(x , ones);
+  vec_float4 z  = divf4  (x1, spu_add(x, ones));
+  vec_float4 w  = spu_mul(z , z);
+  vec_float4 polyw;
+  polyw = spu_madd(spu_splats(_LOGF_H_c0), w, spu_splats(_LOGF_H_c1));
+  polyw = spu_madd(polyw                 , w, spu_splats(_LOGF_H_c2));
+
+  vec_float4 yneg = spu_mul(z, spu_msub(polyw, w, x1));
+  vec_float4 wnew = spu_convtf(xexp,0);
+  vec_float4 zz1 = spu_madd(spu_splats(_LOGF_H_ln2msb), wnew, x1);
+  vec_float4 zz2 = spu_madd(spu_splats(_LOGF_H_ln2lsb), wnew, yneg);
+
+  return spu_sel(spu_add(zz1,zz2), (vec_float4)zeromask, zeromask);
+}
--- a/Extras/simdmathlibrary/spu/modfd2.c
+++ b/Extras/simdmathlibrary/spu/modfd2.c
@@ -0,0 +1,54 @@
+/* modfd2 - for each of two double slots, compute fractional and integral parts.
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+
+// Returns fractional part and stores integral part in *iptr.
+
+vector double
+modfd2 (vector double x, vector double *iptr)
+{
+   vec_double2 integral, fraction;
+   vec_uint4 iszero;
+   vec_uint4 sign = (vec_uint4){0x80000000, 0, 0x80000000, 0};
+   vec_uchar16 pattern = (vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11};
+
+   integral = truncd2( x );
+
+   // if integral is zero, then fraction is x.
+   iszero = spu_cmpeq(spu_andc((vec_uint4)integral, sign), 0);
+   iszero = spu_and(iszero, spu_shuffle(iszero, iszero, pattern));
+   fraction = spu_sel(spu_sub( x, integral ), x, (vec_ullong2)iszero);
+
+   *iptr = integral;
+   return fraction;
+}
+
--- a/Extras/simdmathlibrary/spu/modff4.c
+++ b/Extras/simdmathlibrary/spu/modff4.c
@@ -0,0 +1,47 @@
+/* modff4 - for each of four float slots, compute fractional and integral parts.
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+
+// Returns fractional part and stores integral part in *iptr.
+
+vector float
+modff4 (vector float x, vector float *iptr)
+{
+   vec_float4 integral, fraction;
+
+   integral = truncf4( x );
+   fraction = spu_sub( x, integral );
+
+   *iptr = integral;
+   return fraction;
+}
+
--- a/Extras/simdmathlibrary/spu/nearbyintd2.c
+++ b/Extras/simdmathlibrary/spu/nearbyintd2.c
@@ -0,0 +1,71 @@
+/* nearbyintd2 - Round the input to the nearest integer according to
+      the current rounding mode without raising an inexact exception.
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector double 
+nearbyintd2(vector double in)
+{
+  vec_uint4 fpscr;
+  vec_ullong2 sign = ((vec_ullong2){0x8000000000000000ULL,0x8000000000000000ULL});
+  vec_double2 out, addend;
+  vec_uint4 vec_zero = ((vec_uint4){0,0,0,0});
+
+  fpscr = spu_mffpscr();
+
+  // check denormalized
+  vec_uint4 exp = spu_and( (vec_uint4)in, 0x7FF00000 );
+  vec_uint4 is_denorm = spu_cmpeq( exp, 0 );
+  vec_uint4 ofs = spu_and( ((vec_uint4){0x00100000,0,0x00100000,0}), is_denorm);
+
+  // check zero
+  vec_uint4 abs_x = spu_and((vec_uint4)in, ((vec_uint4){0x7FFFFFFF,-1,0x7FFFFFFF,-1}));
+  vec_uint4 is_zerox = spu_cmpeq( abs_x, vec_zero);
+  is_zerox = spu_and( is_zerox, spu_shuffle(is_zerox,is_zerox, ((vec_uchar16){4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11})));
+  ofs = spu_sel( ofs, vec_zero, is_zerox);
+
+  vec_double2 xx = (vec_double2)spu_or( (vec_uint4)in, ofs );
+
+  /* Add 2^53 and then subtract 2^53 to affect a round to be performed by the
+   * hardware. Also preserve the input sign so that negative inputs that 
+   * round to zero generate a -0.0.
+   */
+  vec_uint4 is_large = spu_cmpgt( exp, 0x43200000 );
+  is_large = spu_shuffle(is_large,is_large,((vec_uchar16){0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8}));
+  addend = spu_sel((vec_double2)((vec_ullong2){0x4330000000000000ULL,0x4330000000000000ULL}), ((vec_double2){0.0,0.0}), (vec_ullong2)is_large);
+  addend = spu_sel(addend, xx, sign);
+
+  out = spu_sel(spu_sub(spu_add(xx, addend), addend), xx, sign);
+
+  spu_mtfpscr(fpscr);
+
+  return (out);
+}
--- a/Extras/simdmathlibrary/spu/nearbyintf4.c
+++ b/Extras/simdmathlibrary/spu/nearbyintf4.c
@@ -0,0 +1,50 @@
+/* nearbyintf4 - for each of four float slots, round  to the nearest integer,
+                 consistent with the current rounding model,
+                 without raising an inexact floating-point exception.
+                 On SPU, the rounding mode for float is always towards zero.
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector float  nearbyintf4(vector float x)
+{
+   vector signed int xi;
+   vector unsigned int inrange;
+    
+   // Can convert to and from signed integer to truncate values in range [-2^31, 2^31).
+   // However, no truncation needed if exponent > 22.
+
+   inrange = spu_cmpabsgt( (vector float)spu_splats(0x4b000000), x );
+
+   xi = spu_convts( x, 0 );
+
+   return spu_sel( x, spu_convtf( xi, 0 ), inrange );  
+}
--- a/Extras/simdmathlibrary/spu/negated2.c
+++ b/Extras/simdmathlibrary/spu/negated2.c
@@ -0,0 +1,38 @@
+/* negated2 - for each of two double slots, negate the sign bit.
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector double
+negated2 (vector double x)
+{
+   return (vec_double2)spu_xor( (vec_ullong2)x, spu_splats(0x8000000000000000ull) );
+}
+
--- a/Extras/simdmathlibrary/spu/negatef4.c
+++ b/Extras/simdmathlibrary/spu/negatef4.c
@@ -0,0 +1,38 @@
+/* negatef4 - for each of four float slots, negate the sign bit.
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+
+vector float negatef4 (vector float x)
+{
+   return (vec_float4)spu_xor( (vec_uint4)x, spu_splats(0x80000000) );
+}
+
--- a/Extras/simdmathlibrary/spu/negatei4.c
+++ b/Extras/simdmathlibrary/spu/negatei4.c
@@ -0,0 +1,39 @@
+/* negatei4 - for each of 4 signed int slots, negate the sign bit.
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector signed int
+negatei4 (vector signed int x)
+{
+  vector signed int zero = (vector signed int){0,0,0,0};
+  return spu_sub (zero, x);
+}
+
--- a/Extras/simdmathlibrary/spu/negatell2.c
+++ b/Extras/simdmathlibrary/spu/negatell2.c
@@ -0,0 +1,43 @@
+/* negatell2 - for each of 2 signed long long slots, negate the sign bit.
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector signed long long
+negatell2 (vector signed long long x)
+{
+  vector signed int zero = (vector signed int){0,0,0,0};
+  vector signed int borrow;
+
+  borrow = spu_genb(zero, (vec_int4)x);
+  borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){4,5,6,7, 0xC0,0xC0,0xC0,0xC0, 12,13,14,15, 0xC0,0xC0,0xC0,0xC0}));
+  return (vec_llong2)spu_subx(zero, (vec_int4)x, borrow);
+}
+
--- a/Extras/simdmathlibrary/spu/nextafterd2.c
+++ b/Extras/simdmathlibrary/spu/nextafterd2.c
@@ -0,0 +1,92 @@
+/* nextafterd2 - find next representable floating-point value towards 2nd param.
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector double
+nextafterd2 (vector double xx, vector double yy)
+{
+	vec_uint4 abs_x, abs_y, sign_x, abs_dif;
+	vec_uint4 is_sub, is_zerox, is_zeroy;
+	vec_uint4 is_equal, is_infy, is_nany;
+	vec_uint4 res0, res1, res;
+	vec_uint4 vec_zero = ((vec_uint4){0,0,0,0});
+	vec_uint4 vec_one  = ((vec_uint4){0,1,0,1});
+	vec_uint4 vec_m1   = ((vec_uint4){0x80000000,1,0x80000000,1});
+	vec_uint4 msk_exp  = ((vec_uint4){0x7FF00000,0,0x7FF00000,0});
+	vec_uint4 msk_abs  = ((vec_uint4){0x7FFFFFFF,-1,0x7FFFFFFF,-1});
+	vec_uchar16 msk_all_eq = ((vec_uchar16){4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11});
+
+	// mask sign bit
+	abs_x = spu_and( (vec_uint4)xx, msk_abs);
+	abs_y = spu_and( (vec_uint4)yy, msk_abs);
+
+	is_zerox = spu_cmpeq( abs_x, vec_zero);
+	is_zerox = spu_and( is_zerox, spu_shuffle(is_zerox,is_zerox,msk_all_eq));
+
+	// -0 exception
+	sign_x = spu_and((vec_uint4)xx, ((vec_uint4){0x80000000,0,0x80000000,0}));
+	sign_x = spu_sel(sign_x, vec_zero, is_zerox);
+
+	// if same sign |y| < |x| -> decrease 
+	abs_dif = spu_subx(abs_y, abs_x, spu_rlqwbyte(spu_genb(abs_y, abs_x), 4));
+	is_sub = spu_xor((vec_uint4)yy, sign_x);	// not same sign -> decrease
+	is_sub = spu_or(is_sub, abs_dif);
+	is_sub = spu_rlmaska(is_sub, -31);
+	is_sub = spu_shuffle(is_sub,is_sub,((vec_uchar16){0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8}));
+
+	res0 = spu_addx( abs_x, vec_one, spu_rlqwbyte(spu_genc(abs_x,vec_one),4)); // calc increase
+	res1 = spu_subx( abs_x, vec_one, spu_rlqwbyte(spu_genb(abs_x,vec_one),4)); // calc decrease
+	res  = spu_sel( res0, res1, is_sub);	// select increase or decrease
+	res  = spu_or( res, sign_x);			// set sign
+
+	// check exception
+	// 0 -> -1
+	res = spu_sel(res, vec_m1, spu_and(is_zerox, is_sub));
+
+	// check equal (include 0,-0)
+	is_zeroy = spu_cmpeq( abs_y, vec_zero);
+	is_zeroy = spu_and( is_zeroy, spu_shuffle(is_zeroy,is_zeroy,msk_all_eq));
+	is_equal = spu_cmpeq((vec_uint4)xx, (vec_uint4)yy);
+	is_equal = spu_and(is_equal, spu_shuffle(is_equal,is_equal,msk_all_eq));
+	is_equal = spu_or(is_equal, spu_and(is_zeroy, is_zerox));
+	res = spu_sel(res, (vec_uint4)yy, is_equal);
+
+	// check nan
+	is_infy = spu_cmpeq( abs_y, msk_exp);
+	is_infy = spu_and( is_infy, spu_shuffle(is_infy,is_infy,msk_all_eq));
+	is_nany = spu_and( abs_y, msk_exp);
+	is_nany = spu_cmpeq( is_nany, msk_exp);
+	is_nany = spu_and( is_nany, spu_shuffle(is_nany,is_nany,msk_all_eq));
+	is_nany = spu_sel( is_nany, vec_zero, is_infy);
+	res = spu_sel(res, (vec_uint4)yy, is_nany);
+
+	return (vec_double2)res;
+}
--- a/Extras/simdmathlibrary/spu/nextafterf4.c
+++ b/Extras/simdmathlibrary/spu/nextafterf4.c
@@ -0,0 +1,66 @@
+/* nextafterf4  - for each of four float slots,
+                  return the the next representable value after x in the direction fo y,
+                  if x is euqal to y, the result is y.
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+
+vector float nextafterf4(vector float x, vector float y)
+{
+  vec_float4 x_not_dec, lala_inc, lala_dec;
+  vec_uint4 abs_inc_number, abs_dec_number;
+
+  vec_uint4 A, B;
+
+  //abs_inc, abs_dec  
+  abs_inc_number = spu_sel(spu_splats((unsigned int)0x800000), spu_add((vec_uint4)x, spu_splats((unsigned int)0x1)), spu_cmpabsgt(x, spu_splats((float)0x0)));
+  abs_dec_number = (vec_uint4)spu_add((vec_float4)spu_sub((vec_uint4)x, spu_splats((unsigned int)0x1)), spu_splats((float)0x0));
+
+  //x<= y 
+  A=  spu_andc(abs_inc_number, spu_splats((unsigned int)0x80000000));
+  // in < 0
+  B= abs_dec_number;
+
+  lala_inc = spu_sel((vec_float4)A, (vec_float4)B, spu_cmpgt(spu_splats((float)0x0), x));
+
+  // in <=0,   abs_inc ( if in==0, set result's sign to -)
+  //A= spu_or(spu_splats((unsigned int)0x80000000), spu_andc(abs_inc_number, spu_splats((unsigned int)0x80000000)));
+  A= spu_or(abs_inc_number, spu_splats((unsigned int)0x80000000));
+  // in > 0
+  B = abs_dec_number;
+  lala_dec = spu_sel((vec_float4)A, (vec_float4)B, spu_cmpgt(x, spu_splats((float)0x0)));
+
+
+  x_not_dec = spu_sel(y, lala_inc, spu_cmpgt(y, x));
+
+  // (x <= y) ||  (x > y)
+  return spu_sel(x_not_dec, lala_dec, spu_cmpgt(x, y));
+}
--- a/Extras/simdmathlibrary/spu/powf4.c
+++ b/Extras/simdmathlibrary/spu/powf4.c
@@ -0,0 +1,72 @@
+/* powf4 - 
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector float
+powf4 (vector float x, vector float y)
+{
+  vec_int4 zeros = spu_splats((int)0);
+  vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq((vec_float4)zeros, x);
+
+  vec_uchar16 negmask  = (vec_uchar16)spu_cmpgt(spu_splats(0.0f), x);
+  
+  vec_float4 sbit = (vec_float4)spu_splats((int)0x80000000);
+  vec_float4 absx = spu_andc(x, sbit);
+  vec_float4 absy = spu_andc(y, sbit);
+  vec_uint4 oddy = spu_and(spu_convtu(absy, 0), (vec_uint4)spu_splats(0x00000001));
+  negmask = spu_and(negmask, (vec_uchar16)spu_cmpgt(oddy, (vec_uint4)zeros));
+
+  vec_float4 res = exp2f4(spu_mul(y, log2f4(absx)));
+  res = spu_sel(res, spu_or(sbit, res), negmask);
+
+
+  return spu_sel(res, (vec_float4)zeros, zeromask);
+}
+
+/*
+{
+  vec_int4 zeros = spu_splats(0);
+  vec_int4 ones  = (vec_int4)spu_splats((char)0xFF);
+  vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq((vec_float4)zeros, x);
+  vec_uchar16 onemask  = (vec_uchar16)spu_cmpeq((vec_float4)ones , y);
+  vec_uchar16 negmask  = (vec_uchar16)spu_cmpgt(spu_splats(0.0f),  x);
+  vec_float4 sbit = (vec_float4)spu_splats((int)0x80000000);
+  vec_float4 absx = spu_andc(x, sbit);
+  vec_float4 absy = spu_andc(y, sbit);
+  vec_uint4  oddy = spu_and(spu_convtu(absy, 0), (vec_uint4)spu_splats(0x00000001));
+  negmask         = spu_and(negmask, (vec_uchar16)spu_cmpgt(oddy, (vec_uint4)zeros));
+
+  
+
+}
+
+*/
--- a/Extras/simdmathlibrary/spu/recipd2.c
+++ b/Extras/simdmathlibrary/spu/recipd2.c
@@ -0,0 +1,80 @@
+/* recipd2 - for each of two double slots, compute reciprocal.
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+// Handles exceptional values as follows:
+// NaN -> NaN
+// (+,-)Inf -> (+,-)0
+// (+,-)0 -> (+,-)Inf
+// Denormal inputs are treated as zero.
+
+vector double
+recipd2 (vector double x)
+{
+   vec_ullong2 expmask, signmask;
+   vec_double2 one, man, exp, nexp, y1, y2, y3, zero, inf, result;
+   vec_float4  onef, manf, y0f, y1f;
+
+   expmask = spu_splats(0x7ff0000000000000ull);
+   signmask = spu_splats(0x8000000000000000ull);
+   onef = spu_splats(1.0f);
+   one = spu_extend( onef );
+
+   // Factor ( mantissa x 2^exponent ) into ( mantissa x 2 ) and ( 2^(exponent-1) ).
+   // Invert exponent part with subtraction.
+
+   exp = spu_and( x, (vec_double2)expmask );
+   nexp = (vec_double2)spu_sub( (vec_uint4)expmask, (vec_uint4)exp );
+   man = spu_sel( x, (vec_double2)spu_splats(0x40000000), expmask );
+
+   // Compute mantissa part with single and double precision Newton-Raphson steps.
+   // Then multiply with 2^(1-exponent).
+
+   manf = spu_roundtf( man );
+   y0f = spu_re( manf );
+   y1f = spu_madd( spu_nmsub( manf, y0f, onef ), y0f, y0f );
+   y1 = spu_extend( y1f );
+   y2 = spu_madd( spu_nmsub( man, y1, one ), y1, y1 );
+   y3 = spu_madd( spu_nmsub( man, y2, one ), y2, y2 );
+   y3 = spu_mul( y3, nexp );
+
+   // Choose iterated result or special value.
+
+   zero = spu_and( x, (vec_double2)signmask );
+   inf = spu_sel( (vec_double2)expmask, x, signmask );
+
+   result = spu_sel( y3, zero, isinfd2 ( x ) );
+   result = spu_sel( result, inf, is0denormd2 ( x ) );
+   result = spu_sel( result, x, isnand2( x ) );
+
+   return result;
+}
+
--- a/Extras/simdmathlibrary/spu/recipf4.c
+++ b/Extras/simdmathlibrary/spu/recipf4.c
@@ -0,0 +1,45 @@
+/* recipf4 - for each of four float slots, compute reciprocal.
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+vector float recipf4 (vector float x)
+{
+   // Reciprocal estimate and 1 Newton-Raphson iteration.
+   // A constant of 1.0 + 1 ulp in the Newton-Raphson step results in exact 
+   // answers for powers of 2, and a slightly smaller relative error bound.
+
+   vec_float4 y0;
+   vec_float4 oneish = (vec_float4)spu_splats(0x3f800001);
+
+   y0 = spu_re( x );
+   return spu_madd( spu_nmsub( x, y0, oneish ), y0, y0 );
+}
+
--- a/Extras/simdmathlibrary/spu/remainderd2.c
+++ b/Extras/simdmathlibrary/spu/remainderd2.c
@@ -0,0 +1,313 @@
+/* A vector double is returned that contains the remainder xi REM yi,
+        for the corresponding elements of vector double x and vector double y.
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+
+static inline vec_uint4 _sub_d_(vec_uint4 aa, vec_uint4 bb);
+static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb);
+static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb);
+static inline vec_uint4 _twice(vec_uint4 aa);
+
+vector double 
+remainderd2(vector double x, vector double yy)
+{
+  vec_uchar16 splat_hi   = ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11});
+  vec_uint4 y_hi;
+  vec_uint4 abs_x, abs_yy, abs_2x, abs_2y;
+  vec_uint4 bias;
+  vec_uint4 nan_out, overflow;
+  vec_uint4 result;
+  vec_uint4 half_smax = spu_splats((unsigned int)0x7FEFFFFF);
+  vec_uint4 sign_mask = (vec_uint4)(spu_splats(0x8000000000000000ULL));
+  vec_uint4 exp_mask  = (vec_uint4)(spu_splats(0x7FF0000000000000ULL));
+  vec_uint4 val_nan   = (vec_uint4)(spu_splats(0x7FF8000000000000ULL));
+  vec_uint4 vec_zero = spu_splats((unsigned int)0);
+  vec_uint4 is_zeroy;
+
+  // cut sign
+  abs_x = spu_andc((vec_uint4)x, sign_mask);
+  abs_yy = spu_andc((vec_uint4)yy, sign_mask);
+  y_hi = spu_shuffle(abs_yy, abs_yy, splat_hi);
+
+
+  // check nan out
+  is_zeroy = spu_cmpeq(abs_yy, vec_zero);
+  is_zeroy = spu_and(is_zeroy, spu_rlqwbyte(is_zeroy, 4));
+  nan_out = _vec_gt64_half(abs_yy, exp_mask);  // y > 7FF00000
+  nan_out = spu_or(nan_out, spu_cmpgt(abs_x, half_smax)); // x >= 7FF0000000000000
+  nan_out = spu_or(nan_out, is_zeroy);                    // y = 0
+  nan_out = spu_shuffle(nan_out, nan_out, splat_hi);
+
+
+  // make y x2
+  abs_2y = _twice(abs_yy); // 2 x y
+
+  /*
+   * use fmodd2 function
+   */
+  // get remainder of y x2
+//  result = (vec_uint4)_fmodd2( x, (vec_double2)abs_2y);
+  {
+    vec_double2 y = (vec_double2)abs_2y;
+
+    int shiftx0, shiftx1, shifty0, shifty1;
+    vec_uchar16 swap_words = ((vec_uchar16){ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11});
+    vec_uchar16 propagate = ((vec_uchar16){ 4,5,6,7, 192,192,192,192, 12,13,14,15, 192,192,192,192});
+//    vec_uchar16 splat_hi = ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11});
+    vec_int4 n, shift;
+    vec_uint4 exp_x, exp_y;
+//    , sign;
+//    vec_uint4 abs_x, abs_y;
+    vec_uint4 abs_y;
+    vec_uint4 mant_x, mant_x0, mant_x1;
+    vec_uint4 mant_y, mant_y0, mant_y1;
+    vec_uint4 mant_0, mant_1;
+    vec_uint4 mant_r, mant_l;
+//    vec_uint4 result;
+    vec_uint4 result0, resultx;
+    vec_uint4 zero_x, zero_y;
+    vec_uint4 denorm_x, denorm_y;
+    vec_uint4 cnt, cnt_x, cnt_y;
+    vec_uint4 shift_x, shift_y;
+    vec_uint4 adj_x, adj_y;
+    vec_uint4 z, borrow, mask;
+    vec_uint4 lsb       = (vec_uint4)(spu_splats(0x0000000000000001ULL));
+//    vec_uint4 sign_mask = (vec_uint4)(spu_splats(0x8000000000000000ULL));
+    vec_uint4 implied_1 = (vec_uint4)(spu_splats(0x0010000000000000ULL));
+    vec_uint4 mant_mask = (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL));
+//    vec_uint4 exp_mask  = (vec_uint4)(spu_splats(0x7FF0000000000000ULL));
+    vec_uint4 merge_sel = ((vec_uint4){0,0,-1,-1});
+//    vec_uint4 vec_zero = spu_splats((unsigned int)0);
+
+//    sign  = spu_and( (vec_uint4)x, sign_mask);
+//    abs_x = spu_andc((vec_uint4)x, sign_mask);
+    abs_y = spu_andc((vec_uint4)y, sign_mask);
+    exp_x = spu_rlmask(abs_x, -20);
+    exp_y = spu_rlmask(abs_y, -20);
+    // get shift count for denorm
+    cnt_x = spu_cntlz(abs_x);
+    cnt_y = spu_cntlz(abs_y);
+    cnt_x = spu_add(cnt_x, spu_sel( vec_zero, spu_rlqwbyte(cnt_x, 4), spu_cmpeq(cnt_x, 32)));
+    cnt_y = spu_add(cnt_y, spu_sel( vec_zero, spu_rlqwbyte(cnt_y, 4), spu_cmpeq(cnt_y, 32)));
+
+    zero_x = spu_cmpgt(cnt_x, 63);  // zero ?
+    zero_y = spu_cmpgt(cnt_y, 63);  // zero ?
+    result0 = spu_or(zero_x, zero_y);
+    result0 = spu_shuffle(result0, result0, splat_hi);
+
+    // 0 - (cnt_x - 11) = 11 - cnt_x
+    shift_x= spu_add(cnt_x, -11);
+    shift_y= spu_add(cnt_y, -11);
+    cnt_x = spu_sub(11, cnt_x);
+    cnt_y = spu_sub(11, cnt_y);
+
+    // count to normalize
+    adj_x = spu_sel(spu_add(exp_x, -1), cnt_x, spu_cmpeq(exp_x, 0));
+    adj_y = spu_sel(spu_add(exp_y, -1), cnt_y, spu_cmpeq(exp_y, 0));
+    adj_x = spu_shuffle(adj_x, adj_x, splat_hi);
+    adj_y = spu_shuffle(adj_y, adj_y, splat_hi);
+
+    // for denorm
+    shiftx0 = spu_extract(shift_x, 0);
+    shiftx1 = spu_extract(shift_x, 2);
+    shifty0 = spu_extract(shift_y, 0);
+    shifty1 = spu_extract(shift_y, 2);
+    mant_x0 = spu_slqwbytebc( spu_slqw(spu_and(abs_x,((vec_uint4){-1,-1,0,0})),shiftx0), shiftx0);
+    mant_y0 = spu_slqwbytebc( spu_slqw(spu_and(abs_y,((vec_uint4){-1,-1,0,0})),shifty0), shifty0);
+    mant_x1 = spu_slqwbytebc( spu_slqw(abs_x,shiftx1), shiftx1);
+    mant_y1 = spu_slqwbytebc( spu_slqw(abs_y,shifty1), shifty1);
+    mant_x = spu_sel(mant_x0, mant_x1, merge_sel);
+    mant_y = spu_sel(mant_y0, mant_y1, merge_sel);
+
+    denorm_x = spu_cmpgt((vec_int4)vec_zero, (vec_int4)adj_x);
+    denorm_y = spu_cmpgt((vec_int4)vec_zero, (vec_int4)adj_y);
+    mant_x = spu_sel(spu_and(abs_x, mant_mask), mant_x, denorm_x);
+    mant_y = spu_sel(spu_and(abs_y, mant_mask), mant_y, denorm_y);
+    mant_x = spu_or(mant_x, implied_1); // hidden bit
+    mant_y = spu_or(mant_y, implied_1); // hidden bit
+
+    // x < y ?
+    resultx = _vec_gt64(abs_y, abs_x);
+
+    n = spu_sub((vec_int4)adj_x, (vec_int4)adj_y);
+    mask = spu_cmpgt(n, 0);
+    mask = spu_andc(mask, resultx);
+
+    while (spu_extract(spu_gather(mask), 0)) {
+      borrow = spu_genb(mant_x, mant_y);
+      borrow = spu_shuffle(borrow, borrow, propagate);
+      z = spu_subx(mant_x, mant_y, borrow);
+
+      result0 = spu_or(spu_and(spu_cmpeq(spu_or(z, spu_shuffle(z, z, swap_words)), 0), mask), result0);
+      
+      mant_x = spu_sel(mant_x, 
+         spu_sel(spu_slqw(mant_x, 1), spu_andc(spu_slqw(z, 1), lsb), spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1)),
+         mask);
+
+      n = spu_add(n, -1);
+      mask = spu_cmpgt(n, 0);
+    }
+
+    borrow = spu_genb(mant_x, mant_y);
+    borrow = spu_shuffle(borrow, borrow, propagate);
+    z = spu_subx(mant_x, mant_y, borrow);
+    mant_x = spu_sel(mant_x, z, spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1));
+    result0 = spu_or(spu_cmpeq(spu_or(mant_x, spu_shuffle(mant_x, mant_x, swap_words)), 0), result0);
+
+    // bring back to original range
+    mant_0 = spu_and(mant_x, ((vec_uint4){0x001FFFFF,-1,0,0}));
+    mant_1 = spu_and(mant_x, ((vec_uint4){0,0,0x001FFFFF,-1}));
+
+    // for adj_y < 0 exp max=1
+    shiftx0 = spu_extract(adj_y, 0);
+    shiftx1 = spu_extract(adj_y, 2);
+    mant_x0 = spu_rlmaskqwbytebc(spu_rlmaskqw(mant_0, shiftx0), 7 + shiftx0);
+    mant_x1 = spu_rlmaskqwbytebc(spu_rlmaskqw(mant_1, shiftx1), 7 + shiftx1);
+    mant_r  = spu_sel(mant_x0, mant_x1, merge_sel);
+
+    // for adj_y >= 0
+    cnt = spu_cntlz(mant_x);
+    cnt = spu_add(cnt, spu_sel( vec_zero, spu_rlqwbyte(cnt, 4), spu_cmpeq(cnt, 32)));
+    cnt = spu_add(cnt, -11);
+    cnt = spu_sel(vec_zero, cnt, spu_cmpgt(cnt, 0)); // for exp >= 1
+    shift = (vec_int4)spu_sel(cnt, adj_y, spu_cmpgt(cnt, adj_y));
+    shiftx0 = spu_extract(shift, 0);
+    shiftx1 = spu_extract(shift, 2);
+    mant_x0 = spu_slqwbytebc(spu_slqw(mant_0, shiftx0), shiftx0);
+    mant_x1 = spu_slqwbytebc(spu_slqw(mant_1, shiftx1), shiftx1);
+    mant_l  = spu_sel(mant_x0, mant_x1, merge_sel);
+    cnt = spu_sub(adj_y, (vec_uint4)shift);
+    mant_l = spu_add(mant_l, spu_and(spu_rl(cnt,20), exp_mask));
+
+    result = spu_sel(mant_l, mant_r, denorm_y);
+    result = spu_sel(result, vec_zero, result0); // reminder 0
+    result = spu_sel(result, abs_x,    resultx); // x < y
+//    result = spu_xor(result, sign);              // set sign
+
+//    return ((vec_double2)result);
+  }
+
+
+//  abs_x = spu_sel(spu_andc(result, sign_mask), abs_x, spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FBFFFFF)));
+  abs_x = spu_sel(result, abs_x, spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FEFFFFF)));
+
+  /* if (2*x > y)
+   *     x -= y
+   *     if (2*x >= y) x -= y
+   */
+  overflow = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FEFFFFF));
+  // make x2
+  abs_2x = _twice(abs_x);  // 2 x x
+
+  bias = _vec_gt64(abs_2x, abs_yy);  // abs_2x > abs_yy
+  bias = spu_andc(bias, overflow);
+
+  abs_x = spu_sel(abs_x, _sub_d_(abs_x, abs_yy), bias);
+
+
+  overflow = spu_or(overflow, spu_shuffle(spu_rlmaska(abs_x, -31), vec_zero, splat_hi)); // minous
+
+  // make x2
+  abs_2x = _twice(spu_andc(abs_x, sign_mask));  // 2 x x  unsupport minous 
+  bias = spu_andc(bias, spu_rlmaska(_sub_d_(abs_2x, abs_yy), -31));
+  bias = spu_andc(spu_shuffle(bias, bias, splat_hi), overflow);
+  abs_x = spu_sel(abs_x, _sub_d_(abs_x, abs_yy), bias);
+
+  /* select final answer 
+   */
+  result = spu_xor(abs_x, spu_and((vec_uint4)x, sign_mask)); // set sign
+  result = spu_sel(result, val_nan, nan_out); // if nan
+
+  return ((vec_double2)result);
+}
+
+/*
+ * subtraction function in limited confdition
+ */
+static inline vec_uint4 _sub_d_(vec_uint4 aa, vec_uint4 bb)
+{
+  // which is bigger input aa or bb
+  vec_uint4 is_bigb = _vec_gt64(bb, aa);  // bb > aa
+
+  // need denorm calc ?
+  vec_uint4 norm_a, norm_b;
+  norm_a = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL)));
+  norm_b = spu_cmpgt(bb, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL)));
+  norm_a = spu_and(norm_a, norm_b);
+  norm_a = spu_shuffle(norm_a, norm_a,((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
+
+  // calc (aa - bb) and (bb - aa)
+  vec_uint4 res_a, res_b, res;
+  vec_uint4 borrow_a, borrow_b;
+  vec_uchar16 mask_b = ((vec_uchar16){4,5,6,7,192,192,192,192,12,13,14,15,192,192,192,192});
+  borrow_a = spu_genb(aa, bb);
+  borrow_b = spu_genb(bb, aa);
+  borrow_a = spu_shuffle(borrow_a, borrow_a, mask_b);
+  borrow_b = spu_shuffle(borrow_b, borrow_b, mask_b);
+  res_a = spu_subx(aa, bb, borrow_a);
+  res_b = spu_subx(bb, aa, borrow_b);
+  res_b = spu_or(res_b, ((vec_uint4){0x80000000,0,0x80000000,0}));  // set sign
+
+  res = spu_sel(res_a, res_b, is_bigb);  // select (aa - bb) or (bb - aa)
+  // select normal calc or special
+  res = spu_sel(res, (vec_uint4)spu_sub((vec_double2)aa, (vec_double2)bb), norm_a);
+
+  return res;
+}
+
+
+/*
+ * extend spu_cmpgt function to 64bit data
+ */
+static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb)
+{
+  vec_uint4 gt = spu_cmpgt(aa, bb);  // aa > bb
+  vec_uint4 eq = spu_cmpeq(aa, bb);  // aa = bb
+  return spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4))); // only higher is right
+}
+static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb)
+{
+  vec_uint4 gt_hi = _vec_gt64_half(aa, bb); // only higher is right
+  return spu_shuffle(gt_hi, gt_hi, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
+}
+
+/*
+ * double formated x2
+ */
+static inline vec_uint4 _twice(vec_uint4 aa)
+{
+  vec_uint4 norm = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL))); // exp > 0
+  norm = spu_shuffle(norm, norm, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
+
+  // if denorm or zero << 1 , if norm exp + 1
+  return spu_sel(spu_slqw(aa, 1), spu_add(aa, (vec_uint4)(spu_splats(0x0010000000000000ULL))), norm); // x2
+}
--- a/Extras/simdmathlibrary/spu/remainderf4.c
+++ b/Extras/simdmathlibrary/spu/remainderf4.c
@@ -0,0 +1,107 @@
+/* remainderf4 - for each of four float slots, compute remainder of x/y defined as x - nearest_integer(x/y) * y.
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+//
+// This returns an accurate result when |divf4(x,y)| < 2^20 and |x| < 2^128, and otherwise returns zero.
+// If x == 0, the result is 0.
+// If x != 0 and y == 0, the result is undefined.
+vector float
+remainderf4 (vector float x, vector float y)
+{
+   vec_float4 q, xabs, yabs, qabs, xabs2, yabshalf;
+   vec_int4   qi0, qi1, qi2;
+   vec_float4 i0, i1, i2, i, rem;
+   vec_uint4  inrange, odd0, odd1, odd2, cmp1, cmp2, odd;
+
+   // Find i = truncated_integer(|x/y|)
+
+   // By the error bounds of divf4, if |x/y| is < 2^20, the quotient is at most off by 1.0.
+   // Thus the exact truncation is either the truncated quotient, one less, or one greater.
+
+   q = divf4( x, y );
+   xabs = fabsf4( x );
+   yabs = fabsf4( y );
+   qabs = fabsf4( q );
+   xabs2 = spu_add( xabs, xabs );
+    
+   inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x49800000), q );
+   inrange = spu_and( inrange, spu_cmpabsgt( (vec_float4)spu_splats(0x7f800000), x ) );
+
+   qi1 = spu_convts( qabs, 0 );
+   qi0 = spu_add( qi1, -1 );
+   qi2 = spu_add( qi1, 1 );
+
+   odd1 = spu_cmpeq( spu_and( qi1, 1 ), 1 );
+   odd0 = odd2 = spu_nor( odd1, odd1 );
+
+   i0 = spu_convtf( qi0, 0 );
+   i1 = spu_convtf( qi1, 0 );
+   i2 = spu_convtf( qi2, 0 );
+
+   // Correct i will be the largest one such that |x| - i*|y| >= 0.  Can test instead as 
+   // 2*|x| - i*|y| >= |x|:
+   // 
+   // With exact inputs, the negative-multiply-subtract gives the exact result rounded towards zero.  
+   // Thus |x| - i*|y| may be < 0 but still round to zero.  However, if 2*|x| - i*|y| < |x|, the computed
+   // answer will be rounded down to < |x|.  2*|x| can be represented exactly provided |x| < 2^128.
+
+   cmp1 = spu_cmpgt( xabs, spu_nmsub( i1, yabs, xabs2 ) );
+   cmp2 = spu_cmpgt( xabs, spu_nmsub( i2, yabs, xabs2 ) );
+
+   i = i0;
+   i = spu_sel( i1, i, cmp1 );
+   i = spu_sel( i2, i, cmp2 );
+
+   odd = odd0;
+   odd = spu_sel( odd1, odd, cmp1 );
+   odd = spu_sel( odd2, odd, cmp2 );
+   
+   rem = spu_nmsub( i, yabs, xabs );
+
+   // Test whether i or i+1 = nearest_integer(|x/y|)
+   //
+   // i+1 is correct if:
+   //
+   // rem > 0.5*|y|
+   // or 
+   // rem = 0.5*|y| and i is odd
+
+   yabshalf = spu_mul( yabs, spu_splats(0.5f) );
+   cmp1 = spu_cmpgt( rem, yabshalf );
+   cmp2 = spu_and( spu_cmpeq( rem, yabshalf ), odd );
+
+   i = spu_sel( i, spu_add( i, spu_splats(1.0f) ), spu_or( cmp1, cmp2 ) );
+   i = copysignf4( i, q );
+
+   return spu_sel( spu_splats(0.0f), spu_nmsub( i, y, x ), inrange );
+}
+
--- a/Extras/simdmathlibrary/spu/remquod2.c
+++ b/Extras/simdmathlibrary/spu/remquod2.c
@@ -0,0 +1,356 @@
+/* remquod2 - 
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <simdmath.h>
+#include <spu_intrinsics.h>
+
+/* 
+ * This function returns the same vector double result as remainderd2(). 
+ * In addition a vector signed long long is storedin *pquo, 
+ * that contains the corresponding element values whose sign is 
+ * the sign of xi / yi and whose magnitude is congruent modulo 2n to
+ * the magnitude of the integral quotient of xi / yi, where n is 
+ * an implementation-defined integer greater than or equal to 3.
+ */
+ 
+static inline vec_uint4 _sub_d_(vec_uint4 aa, vec_uint4 bb);
+static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb);
+static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb);
+static inline vec_uint4 _twice(vec_uint4 aa);
+
+vector double 
+remquod2(vector double x, vector double yy, vector signed long long *quo)
+{
+  vec_uchar16 splat_hi   = ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11});
+  vec_int4 quotient, quotient0;
+  vec_uint4 y_hi;
+  vec_uint4 abs_x, abs_yy, abs_2x, abs_8y, abs_4y, abs_2y;
+  vec_uint4 bias;
+  vec_uint4 nan_out, not_ge, quo_pos, overflow;
+  vec_uint4 result;
+  vec_uint4 half_smax = spu_splats((unsigned int)0x7FEFFFFF);
+  vec_uint4 sign_mask = (vec_uint4)(spu_splats(0x8000000000000000ULL));
+  vec_uint4 exp_mask  = (vec_uint4)(spu_splats(0x7FF0000000000000ULL));
+  vec_uint4 val_nan   = (vec_uint4)(spu_splats(0x7FF8000000000000ULL));
+  vec_uint4 vec_zero = spu_splats((unsigned int)0);
+  vec_uint4 is_zeroy;
+
+  // cut sign
+  abs_x = spu_andc((vec_uint4)x, sign_mask);
+  abs_yy = spu_andc((vec_uint4)yy, sign_mask);
+  y_hi = spu_shuffle(abs_yy, abs_yy, splat_hi);
+
+  quo_pos = spu_cmpgt((vec_int4)spu_and((vec_uint4)spu_xor(x, yy), sign_mask), -1);
+  quo_pos = spu_shuffle(quo_pos, quo_pos, splat_hi);
+
+  // check nan out
+  is_zeroy = spu_cmpeq(abs_yy, vec_zero);
+  is_zeroy = spu_and(is_zeroy, spu_rlqwbyte(is_zeroy, 4));
+  nan_out = _vec_gt64_half(abs_yy, exp_mask);  // y > 7FF00000
+  nan_out = spu_or(nan_out, spu_cmpgt(abs_x, half_smax)); // x >= 7FF0000000000000
+  nan_out = spu_or(nan_out, is_zeroy);                    // y = 0
+  nan_out = spu_shuffle(nan_out, nan_out, splat_hi);
+
+
+  // make y x8
+  abs_2y = _twice(abs_yy); // 2 x y
+  abs_4y = _twice(abs_2y); // 4 x y
+  abs_8y = _twice(abs_4y); // 2 x y
+
+  /*
+   * use fmodd2 function
+   */
+  // get remainder of y x8
+//  result = (vec_uint4)_fmodd2( x, (vec_double2)abs_8y);
+  {
+    vec_double2 y = (vec_double2)abs_8y;
+
+    int shiftx0, shiftx1, shifty0, shifty1;
+    vec_uchar16 swap_words = ((vec_uchar16){ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11});
+    vec_uchar16 propagate = ((vec_uchar16){ 4,5,6,7, 192,192,192,192, 12,13,14,15, 192,192,192,192});
+//    vec_uchar16 splat_hi = ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11});
+    vec_int4 n, shift;
+    vec_uint4 exp_x, exp_y;
+//    , sign;
+//    vec_uint4 abs_x, abs_y;
+    vec_uint4 abs_y;
+    vec_uint4 mant_x, mant_x0, mant_x1;
+    vec_uint4 mant_y, mant_y0, mant_y1;
+    vec_uint4 mant_0, mant_1;
+    vec_uint4 mant_r, mant_l;
+//    vec_uint4 result;
+    vec_uint4 result0, resultx;
+    vec_uint4 zero_x, zero_y;
+    vec_uint4 denorm_x, denorm_y;
+    vec_uint4 cnt, cnt_x, cnt_y;
+    vec_uint4 shift_x, shift_y;
+    vec_uint4 adj_x, adj_y;
+    vec_uint4 z, borrow, mask;
+    vec_uint4 lsb       = (vec_uint4)(spu_splats(0x0000000000000001ULL));
+//    vec_uint4 sign_mask = (vec_uint4)(spu_splats(0x8000000000000000ULL));
+    vec_uint4 implied_1 = (vec_uint4)(spu_splats(0x0010000000000000ULL));
+    vec_uint4 mant_mask = (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL));
+//    vec_uint4 exp_mask  = (vec_uint4)(spu_splats(0x7FF0000000000000ULL));
+    vec_uint4 merge_sel = ((vec_uint4){0,0,-1,-1});
+//    vec_uint4 vec_zero = spu_splats((unsigned int)0);
+
+//    sign  = spu_and( (vec_uint4)x, sign_mask);
+//    abs_x = spu_andc((vec_uint4)x, sign_mask);
+    abs_y = spu_andc((vec_uint4)y, sign_mask);
+    exp_x = spu_rlmask(abs_x, -20);
+    exp_y = spu_rlmask(abs_y, -20);
+    // get shift count for denorm
+    cnt_x = spu_cntlz(abs_x);
+    cnt_y = spu_cntlz(abs_y);
+    cnt_x = spu_add(cnt_x, spu_sel( vec_zero, spu_rlqwbyte(cnt_x, 4), spu_cmpeq(cnt_x, 32)));
+    cnt_y = spu_add(cnt_y, spu_sel( vec_zero, spu_rlqwbyte(cnt_y, 4), spu_cmpeq(cnt_y, 32)));
+
+    zero_x = spu_cmpgt(cnt_x, 63);  // zero ?
+    zero_y = spu_cmpgt(cnt_y, 63);  // zero ?
+    result0 = spu_or(zero_x, zero_y);
+    result0 = spu_shuffle(result0, result0, splat_hi);
+
+    // 0 - (cnt_x - 11) = 11 - cnt_x
+    shift_x= spu_add(cnt_x, -11);
+    shift_y= spu_add(cnt_y, -11);
+    cnt_x = spu_sub(11, cnt_x);
+    cnt_y = spu_sub(11, cnt_y);
+
+    // count to normalize
+    adj_x = spu_sel(spu_add(exp_x, -1), cnt_x, spu_cmpeq(exp_x, 0));
+    adj_y = spu_sel(spu_add(exp_y, -1), cnt_y, spu_cmpeq(exp_y, 0));
+    adj_x = spu_shuffle(adj_x, adj_x, splat_hi);
+    adj_y = spu_shuffle(adj_y, adj_y, splat_hi);
+
+    // for denorm
+    shiftx0 = spu_extract(shift_x, 0);
+    shiftx1 = spu_extract(shift_x, 2);
+    shifty0 = spu_extract(shift_y, 0);
+    shifty1 = spu_extract(shift_y, 2);
+    mant_x0 = spu_slqwbytebc( spu_slqw(spu_and(abs_x,((vec_uint4){-1,-1,0,0})),shiftx0), shiftx0);
+    mant_y0 = spu_slqwbytebc( spu_slqw(spu_and(abs_y,((vec_uint4){-1,-1,0,0})),shifty0), shifty0);
+    mant_x1 = spu_slqwbytebc( spu_slqw(abs_x,shiftx1), shiftx1);
+    mant_y1 = spu_slqwbytebc( spu_slqw(abs_y,shifty1), shifty1);
+    mant_x = spu_sel(mant_x0, mant_x1, merge_sel);
+    mant_y = spu_sel(mant_y0, mant_y1, merge_sel);
+
+    denorm_x = spu_cmpgt((vec_int4)vec_zero, (vec_int4)adj_x);
+    denorm_y = spu_cmpgt((vec_int4)vec_zero, (vec_int4)adj_y);
+    mant_x = spu_sel(spu_and(abs_x, mant_mask), mant_x, denorm_x);
+    mant_y = spu_sel(spu_and(abs_y, mant_mask), mant_y, denorm_y);
+    mant_x = spu_or(mant_x, implied_1); // hidden bit
+    mant_y = spu_or(mant_y, implied_1); // hidden bit
+
+    // x < y ?
+    resultx = _vec_gt64(abs_y, abs_x);
+
+    n = spu_sub((vec_int4)adj_x, (vec_int4)adj_y);
+    mask = spu_cmpgt(n, 0);
+    mask = spu_andc(mask, resultx);
+
+    while (spu_extract(spu_gather(mask), 0)) {
+      borrow = spu_genb(mant_x, mant_y);
+      borrow = spu_shuffle(borrow, borrow, propagate);
+      z = spu_subx(mant_x, mant_y, borrow);
+
+      result0 = spu_or(spu_and(spu_cmpeq(spu_or(z, spu_shuffle(z, z, swap_words)), 0), mask), result0);
+      
+      mant_x = spu_sel(mant_x, 
+         spu_sel(spu_slqw(mant_x, 1), spu_andc(spu_slqw(z, 1), lsb), spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1)),
+         mask);
+
+      n = spu_add(n, -1);
+      mask = spu_cmpgt(n, 0);
+    }
+
+    borrow = spu_genb(mant_x, mant_y);
+    borrow = spu_shuffle(borrow, borrow, propagate);
+    z = spu_subx(mant_x, mant_y, borrow);
+    mant_x = spu_sel(mant_x, z, spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1));
+    result0 = spu_or(spu_cmpeq(spu_or(mant_x, spu_shuffle(mant_x, mant_x, swap_words)), 0), result0);
+
+    // bring back to original range
+    mant_0 = spu_and(mant_x, ((vec_uint4){0x001FFFFF,-1,0,0}));
+    mant_1 = spu_and(mant_x, ((vec_uint4){0,0,0x001FFFFF,-1}));
+
+    // for adj_y < 0 exp max=1
+    shiftx0 = spu_extract(adj_y, 0);
+    shiftx1 = spu_extract(adj_y, 2);
+    mant_x0 = spu_rlmaskqwbytebc(spu_rlmaskqw(mant_0, shiftx0), 7 + shiftx0);
+    mant_x1 = spu_rlmaskqwbytebc(spu_rlmaskqw(mant_1, shiftx1), 7 + shiftx1);
+    mant_r  = spu_sel(mant_x0, mant_x1, merge_sel);
+
+    // for adj_y >= 0
+    cnt = spu_cntlz(mant_x);
+    cnt = spu_add(cnt, spu_sel( vec_zero, spu_rlqwbyte(cnt, 4), spu_cmpeq(cnt, 32)));
+    cnt = spu_add(cnt, -11);
+    cnt = spu_sel(vec_zero, cnt, spu_cmpgt(cnt, 0)); // for exp >= 1
+    shift = (vec_int4)spu_sel(cnt, adj_y, spu_cmpgt(cnt, adj_y));
+    shiftx0 = spu_extract(shift, 0);
+    shiftx1 = spu_extract(shift, 2);
+    mant_x0 = spu_slqwbytebc(spu_slqw(mant_0, shiftx0), shiftx0);
+    mant_x1 = spu_slqwbytebc(spu_slqw(mant_1, shiftx1), shiftx1);
+    mant_l  = spu_sel(mant_x0, mant_x1, merge_sel);
+    cnt = spu_sub(adj_y, (vec_uint4)shift);
+    mant_l = spu_add(mant_l, spu_and(spu_rl(cnt,20), exp_mask));
+
+    result = spu_sel(mant_l, mant_r, denorm_y);
+    result = spu_sel(result, vec_zero, result0); // reminder 0
+    result = spu_sel(result, abs_x,    resultx); // x < y
+//    result = spu_xor(result, sign);              // set sign
+
+//    return ((vec_double2)result);
+  }
+
+  // if y (x8->exp+3 7FF-7FC) overflow 
+//  abs_x = spu_sel(spu_andc(result, sign_mask), abs_x, spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FBFFFFF)));
+  abs_x = spu_sel(result, abs_x, spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FBFFFFF)));
+
+  /* if (x >= 4*y)
+   * 	x -= 4*y
+   *    quotient = 4
+   * else 
+   *	quotient = 0
+   */
+  overflow = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FCFFFFF));
+
+  not_ge = _vec_gt64(abs_4y, abs_x);
+  not_ge = spu_or(not_ge, overflow);
+  abs_x = spu_sel(_sub_d_(abs_x, abs_4y), abs_x, not_ge);
+  quotient = spu_andc(spu_splats((int)4), (vec_int4)not_ge);
+
+  /* if (x >= 2*y
+   *	x -= 2*y
+   *    quotient += 2
+   */
+  overflow = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FDFFFFF));
+
+  not_ge = _vec_gt64(abs_2y, abs_x);  // abs_2y > abs_x
+  not_ge = spu_or(not_ge, overflow);
+  
+  abs_x = spu_sel(_sub_d_(abs_x, abs_2y), abs_x, not_ge);
+  quotient = spu_sel(spu_add(quotient, 2), quotient, not_ge);
+
+  /* if (2*x > y)
+   *     x -= y
+   *     if (2*x >= y) x -= y
+   */
+  overflow = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FEFFFFF));
+  // make x2
+  abs_2x = _twice(abs_x);  // 2 x x
+
+  bias = _vec_gt64(abs_2x, abs_yy);  // abs_2x > abs_yy
+  bias = spu_andc(bias, overflow);
+
+  abs_x = spu_sel(abs_x, _sub_d_(abs_x, abs_yy), bias);
+  quotient = spu_sub(quotient, (vec_int4)bias);
+
+  overflow = spu_or(overflow, spu_shuffle(spu_rlmaska(abs_x, -31), vec_zero, splat_hi)); // minous
+
+  // make x2
+  abs_2x = _twice(spu_andc(abs_x, sign_mask));  // 2 x x  unsupport minous 
+  bias = spu_andc(bias, spu_rlmaska(_sub_d_(abs_2x, abs_yy), -31));
+  bias = spu_andc(spu_shuffle(bias, bias, splat_hi), overflow);
+  abs_x = spu_sel(abs_x, _sub_d_(abs_x, abs_yy), bias);
+  quotient = spu_sub(quotient, (vec_int4)bias);
+
+  /* select final answer 
+   */
+  result = spu_xor(abs_x, spu_and((vec_uint4)x, sign_mask)); // set sign
+  result = spu_sel(result, val_nan, nan_out); // if nan
+
+  quotient = spu_and(quotient, ((vec_int4){0,7,0,7}));       // limit to 3bit
+  quotient0 = spu_subx( (vec_int4)vec_zero, quotient, spu_rlqwbyte(spu_genb((vec_int4)vec_zero,quotient),4));
+  quotient = spu_sel(quotient0, quotient, quo_pos);
+
+  *quo = (vec_llong2)quotient;
+
+  return ((vec_double2)result);
+}
+
+/*
+ * subtraction function in limited confdition
+ */
+static inline vec_uint4 _sub_d_(vec_uint4 aa, vec_uint4 bb)
+{
+  // which is bigger input aa or bb
+  vec_uint4 is_bigb = _vec_gt64(bb, aa);  // bb > aa
+
+  // need denorm calc ?
+  vec_uint4 norm_a, norm_b;
+  norm_a = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL)));
+  norm_b = spu_cmpgt(bb, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL)));
+  norm_a = spu_and(norm_a, norm_b);
+  norm_a = spu_shuffle(norm_a, norm_a,((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
+
+  // calc (aa - bb) and (bb - aa)
+  vec_uint4 res_a, res_b, res;
+  vec_uint4 borrow_a, borrow_b;
+  vec_uchar16 mask_b = ((vec_uchar16){4,5,6,7,192,192,192,192,12,13,14,15,192,192,192,192});
+  borrow_a = spu_genb(aa, bb);
+  borrow_b = spu_genb(bb, aa);
+  borrow_a = spu_shuffle(borrow_a, borrow_a, mask_b);
+  borrow_b = spu_shuffle(borrow_b, borrow_b, mask_b);
+  res_a = spu_subx(aa, bb, borrow_a);
+  res_b = spu_subx(bb, aa, borrow_b);
+  res_b = spu_or(res_b, ((vec_uint4){0x80000000,0,0x80000000,0}));  // set sign
+
+  res = spu_sel(res_a, res_b, is_bigb);  // select (aa - bb) or (bb - aa)
+  // select normal calc or special
+  res = spu_sel(res, (vec_uint4)spu_sub((vec_double2)aa, (vec_double2)bb), norm_a);
+
+  return res;
+}
+
+
+/*
+ * extend spu_cmpgt function to 64bit data
+ */
+static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb)
+{
+  vec_uint4 gt = spu_cmpgt(aa, bb);  // aa > bb
+  vec_uint4 eq = spu_cmpeq(aa, bb);  // aa = bb
+  return spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4))); // only higher is right
+}
+static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb)
+{
+  vec_uint4 gt_hi = _vec_gt64_half(aa, bb); // only higher is right
+  return spu_shuffle(gt_hi, gt_hi, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
+}
+
+/*
+ * double formated x2
+ */
+static inline vec_uint4 _twice(vec_uint4 aa)
+{
+  vec_uint4 norm = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL))); // exp > 0
+  norm = spu_shuffle(norm, norm, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
+
+  // if denorm or zero << 1 , if norm exp + 1
+  return spu_sel(spu_slqw(aa, 1), spu_add(aa, (vec_uint4)(spu_splats(0x0010000000000000ULL))), norm); // x2
+}
--- a/Show More
+++ b/Show More